summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt17
-rw-r--r--src/audio_core/CMakeLists.txt3
-rw-r--r--src/audio_core/audio_renderer.cpp92
-rw-r--r--src/audio_core/audio_renderer.h6
-rw-r--r--src/audio_core/command_generator.cpp216
-rw-r--r--src/audio_core/command_generator.h32
-rw-r--r--src/audio_core/common.h2
-rw-r--r--src/audio_core/info_updater.cpp3
-rw-r--r--src/audio_core/sink_context.cpp15
-rw-r--r--src/audio_core/sink_context.h2
-rw-r--r--src/audio_core/voice_context.cpp88
-rw-r--r--src/audio_core/voice_context.h13
-rw-r--r--src/common/fs/file.cpp29
-rw-r--r--src/common/fs/file.h11
-rw-r--r--src/common/logging/backend.cpp19
-rw-r--r--src/core/CMakeLists.txt11
-rw-r--r--src/core/file_sys/patch_manager.cpp24
-rw-r--r--src/core/file_sys/patch_manager.h3
-rw-r--r--src/core/file_sys/sdmc_factory.cpp31
-rw-r--r--src/core/file_sys/sdmc_factory.h6
-rw-r--r--src/core/hle/ipc_helpers.h8
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp10
-rw-r--r--src/core/hle/service/aoc/aoc_u.h1
-rw-r--r--src/core/hle/service/audio/audren_u.cpp14
-rw-r--r--src/core/hle/service/audio/hwopus.cpp45
-rw-r--r--src/core/hle/service/audio/hwopus.h4
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp24
-rw-r--r--src/core/hle/service/filesystem/filesystem.h1
-rw-r--r--src/core/hle/service/mii/manager.cpp5
-rw-r--r--src/input_common/CMakeLists.txt19
-rwxr-xr-xsrc/input_common/analog_from_button.cpp1
-rw-r--r--src/input_common/gcadapter/gc_adapter.cpp7
-rw-r--r--src/input_common/udp/protocol.h7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h11
-rw-r--r--src/video_core/cdma_pusher.cpp3
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp85
-rw-r--r--src/video_core/command_classes/codecs/codec.h12
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp207
-rw-r--r--src/video_core/command_classes/codecs/h264.h132
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp4
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h307
-rw-r--r--src/video_core/command_classes/nvdec.cpp17
-rw-r--r--src/video_core/command_classes/nvdec.h8
-rw-r--r--src/video_core/command_classes/nvdec_common.h103
-rw-r--r--src/video_core/command_classes/vic.cpp18
-rw-r--r--src/video_core/engines/fermi_2d.cpp22
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt5
-rw-r--r--src/video_core/memory_manager.cpp109
-rw-r--r--src/video_core/memory_manager.h22
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_base.h2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp58
-rw-r--r--src/video_core/renderer_opengl/gl_device.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp29
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h4
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/texture_cache/image_base.cpp5
-rw-r--r--src/video_core/texture_cache/image_base.h39
-rw-r--r--src/video_core/texture_cache/texture_cache.h495
-rw-r--r--src/video_core/texture_cache/types.h1
-rw-r--r--src/video_core/texture_cache/util.cpp26
-rw-r--r--src/video_core/texture_cache/util.h4
-rw-r--r--src/video_core/textures/astc.cpp4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp21
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h3
-rw-r--r--src/yuzu/debugger/profiler.cpp9
-rw-r--r--src/yuzu/game_list.cpp12
-rw-r--r--src/yuzu/game_list.h7
-rw-r--r--src/yuzu/main.cpp25
-rw-r--r--src/yuzu/main.h7
77 files changed, 1841 insertions, 783 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f30dd49a3..f8ec8fea8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,13 +45,23 @@ if (MSVC)
45 /Zc:inline 45 /Zc:inline
46 /Zc:throwingNew 46 /Zc:throwingNew
47 47
48 # External headers diagnostics
49 /experimental:external # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later
50 /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers
51 /external:W0 # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers
52
48 # Warnings 53 # Warnings
49 /W3 54 /W3
50 /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled 55 /we4018 # 'expression': signed/unsigned mismatch
56 /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled
51 /we4101 # 'identifier': unreferenced local variable 57 /we4101 # 'identifier': unreferenced local variable
58 /we4189 # 'identifier': local variable is initialized but not referenced
52 /we4265 # 'class': class has virtual functions, but destructor is not virtual 59 /we4265 # 'class': class has virtual functions, but destructor is not virtual
53 /we4388 # signed/unsigned mismatch 60 /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data
54 /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect 61 /we4305 # 'context': truncation from 'type1' to 'type2'
62 /we4388 # 'expression': signed/unsigned mismatch
63 /we4389 # 'operator': signed/unsigned mismatch
64 /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect
55 /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? 65 /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
56 /we4555 # Expression has no effect; expected expression with side-effect 66 /we4555 # Expression has no effect; expected expression with side-effect
57 /we4715 # 'function': not all control paths return a value 67 /we4715 # 'function': not all control paths return a value
@@ -72,6 +82,7 @@ else()
72 -Werror=missing-declarations 82 -Werror=missing-declarations
73 -Werror=missing-field-initializers 83 -Werror=missing-field-initializers
74 -Werror=reorder 84 -Werror=reorder
85 -Werror=sign-compare
75 -Werror=switch 86 -Werror=switch
76 -Werror=uninitialized 87 -Werror=uninitialized
77 -Werror=unused-function 88 -Werror=unused-function
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d25a1a645..090dd19b1 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -51,9 +51,6 @@ if (NOT MSVC)
51 target_compile_options(audio_core PRIVATE 51 target_compile_options(audio_core PRIVATE
52 -Werror=conversion 52 -Werror=conversion
53 -Werror=ignored-qualifiers 53 -Werror=ignored-qualifiers
54 -Werror=implicit-fallthrough
55 -Werror=reorder
56 -Werror=sign-compare
57 -Werror=shadow 54 -Werror=shadow
58 -Werror=unused-parameter 55 -Werror=unused-parameter
59 -Werror=unused-variable 56 -Werror=unused-variable
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 80ffddb10..7dba739b4 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -12,6 +12,7 @@
12#include "audio_core/voice_context.h" 12#include "audio_core/voice_context.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "common/settings.h" 14#include "common/settings.h"
15#include "core/core_timing.h"
15#include "core/memory.h" 16#include "core/memory.h"
16 17
17namespace { 18namespace {
@@ -28,10 +29,9 @@ namespace {
28 (static_cast<float>(r_channel) * r_mix_amount))); 29 (static_cast<float>(r_channel) * r_mix_amount)));
29} 30}
30 31
31[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel, 32[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(
32 s16 fc_channel, 33 s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel,
33 [[maybe_unused]] s16 lf_channel, 34 s16 br_channel) {
34 s16 bl_channel, s16 br_channel) {
35 // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels 35 // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels
36 // are mixed to be 36.94% 36 // are mixed to be 36.94%
37 37
@@ -56,11 +56,11 @@ namespace {
56 const std::array<float_le, 4>& coeff) { 56 const std::array<float_le, 4>& coeff) {
57 const auto left = 57 const auto left =
58 static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + 58 static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
59 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0]; 59 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3];
60 60
61 const auto right = 61 const auto right =
62 static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + 62 static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] +
63 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0]; 63 static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3];
64 64
65 return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))}; 65 return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))};
66} 66}
@@ -68,7 +68,9 @@ namespace {
68} // namespace 68} // namespace
69 69
70namespace AudioCore { 70namespace AudioCore {
71AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, 71constexpr s32 NUM_BUFFERS = 2;
72
73AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_,
72 AudioCommon::AudioRendererParameter params, 74 AudioCommon::AudioRendererParameter params,
73 Stream::ReleaseCallback&& release_callback, 75 Stream::ReleaseCallback&& release_callback,
74 std::size_t instance_number) 76 std::size_t instance_number)
@@ -77,7 +79,8 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
77 sink_context(params.sink_count), splitter_context(), 79 sink_context(params.sink_count), splitter_context(),
78 voices(params.voice_count), memory{memory_}, 80 voices(params.voice_count), memory{memory_},
79 command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context, 81 command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context,
80 memory) { 82 memory),
83 core_timing{core_timing_} {
81 behavior_info.SetUserRevision(params.revision); 84 behavior_info.SetUserRevision(params.revision);
82 splitter_context.Initialize(behavior_info, params.splitter_count, 85 splitter_context.Initialize(behavior_info, params.splitter_count,
83 params.num_splitter_send_channels); 86 params.num_splitter_send_channels);
@@ -86,16 +89,27 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
86 stream = audio_out->OpenStream( 89 stream = audio_out->OpenStream(
87 core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, 90 core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
88 fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback)); 91 fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
89 audio_out->StartStream(stream); 92 process_event = Core::Timing::CreateEvent(
90 93 fmt::format("AudioRenderer-Instance{}-Process", instance_number),
91 QueueMixedBuffer(0); 94 [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); });
92 QueueMixedBuffer(1); 95 for (s32 i = 0; i < NUM_BUFFERS; ++i) {
93 QueueMixedBuffer(2); 96 QueueMixedBuffer(i);
94 QueueMixedBuffer(3); 97 }
95} 98}
96 99
97AudioRenderer::~AudioRenderer() = default; 100AudioRenderer::~AudioRenderer() = default;
98 101
102ResultCode AudioRenderer::Start() {
103 audio_out->StartStream(stream);
104 ReleaseAndQueueBuffers();
105 return ResultSuccess;
106}
107
108ResultCode AudioRenderer::Stop() {
109 audio_out->StopStream(stream);
110 return ResultSuccess;
111}
112
99u32 AudioRenderer::GetSampleRate() const { 113u32 AudioRenderer::GetSampleRate() const {
100 return worker_params.sample_rate; 114 return worker_params.sample_rate;
101} 115}
@@ -114,7 +128,7 @@ Stream::State AudioRenderer::GetStreamState() const {
114 128
115ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params, 129ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params,
116 std::vector<u8>& output_params) { 130 std::vector<u8>& output_params) {
117 131 std::scoped_lock lock{mutex};
118 InfoUpdater info_updater{input_params, output_params, behavior_info}; 132 InfoUpdater info_updater{input_params, output_params, behavior_info};
119 133
120 if (!info_updater.UpdateBehaviorInfo(behavior_info)) { 134 if (!info_updater.UpdateBehaviorInfo(behavior_info)) {
@@ -194,9 +208,6 @@ ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_param
194 LOG_ERROR(Audio, "Audio buffers were not consumed!"); 208 LOG_ERROR(Audio, "Audio buffers were not consumed!");
195 return AudioCommon::Audren::ERR_INVALID_PARAMETERS; 209 return AudioCommon::Audren::ERR_INVALID_PARAMETERS;
196 } 210 }
197
198 ReleaseAndQueueBuffers();
199
200 return ResultSuccess; 211 return ResultSuccess;
201} 212}
202 213
@@ -220,10 +231,8 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
220 command_generator.PostCommand(); 231 command_generator.PostCommand();
221 // Base sample size 232 // Base sample size
222 std::size_t BUFFER_SIZE{worker_params.sample_count}; 233 std::size_t BUFFER_SIZE{worker_params.sample_count};
223 // Samples 234 // Samples, making sure to clear
224 std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels()); 235 std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels(), 0);
225 // Make sure to clear our samples
226 std::memset(buffer.data(), 0, buffer.size() * sizeof(s16));
227 236
228 if (sink_context.InUse()) { 237 if (sink_context.InUse()) {
229 const auto stream_channel_count = stream->GetNumChannels(); 238 const auto stream_channel_count = stream->GetNumChannels();
@@ -231,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
231 const auto channel_count = buffer_offsets.size(); 240 const auto channel_count = buffer_offsets.size();
232 const auto& final_mix = mix_context.GetFinalMixInfo(); 241 const auto& final_mix = mix_context.GetFinalMixInfo();
233 const auto& in_params = final_mix.GetInParams(); 242 const auto& in_params = final_mix.GetInParams();
234 std::vector<s32*> mix_buffers(channel_count); 243 std::vector<std::span<s32>> mix_buffers(channel_count);
235 for (std::size_t i = 0; i < channel_count; i++) { 244 for (std::size_t i = 0; i < channel_count; i++) {
236 mix_buffers[i] = 245 mix_buffers[i] =
237 command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]); 246 command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]);
@@ -284,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
284 buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample); 293 buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample);
285 } else if (stream_channel_count == 2) { 294 } else if (stream_channel_count == 2) {
286 // Mix all channels into 2 channels 295 // Mix all channels into 2 channels
287 if (sink_context.HasDownMixingCoefficients()) { 296 const auto [left, right] = Mix6To2WithCoefficients(
288 const auto [left, right] = Mix6To2WithCoefficients( 297 fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample,
289 fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, 298 sink_context.GetDownmixCoefficients());
290 sink_context.GetDownmixCoefficients()); 299 buffer[i * stream_channel_count + 0] = left;
291 buffer[i * stream_channel_count + 0] = left; 300 buffer[i * stream_channel_count + 1] = right;
292 buffer[i * stream_channel_count + 1] = right;
293 } else {
294 const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample,
295 lf_sample, bl_sample, br_sample);
296 buffer[i * stream_channel_count + 0] = left;
297 buffer[i * stream_channel_count + 1] = right;
298 }
299 } else if (stream_channel_count == 6) { 301 } else if (stream_channel_count == 6) {
300 // Pass through 302 // Pass through
301 buffer[i * stream_channel_count + 0] = fl_sample; 303 buffer[i * stream_channel_count + 0] = fl_sample;
@@ -315,10 +317,24 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
315} 317}
316 318
317void AudioRenderer::ReleaseAndQueueBuffers() { 319void AudioRenderer::ReleaseAndQueueBuffers() {
318 const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)}; 320 if (!stream->IsPlaying()) {
319 for (const auto& tag : released_buffers) { 321 return;
320 QueueMixedBuffer(tag);
321 } 322 }
323
324 {
325 std::scoped_lock lock{mutex};
326 const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)};
327 for (const auto& tag : released_buffers) {
328 QueueMixedBuffer(tag);
329 }
330 }
331
332 const f32 sample_rate = static_cast<f32>(GetSampleRate());
333 const f32 sample_count = static_cast<f32>(GetSampleCount());
334 const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240));
335 const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1;
336 const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1));
337 core_timing.ScheduleEvent(next_event_time, process_event, {});
322} 338}
323 339
324} // namespace AudioCore 340} // namespace AudioCore
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 18567f618..88fdd13dd 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <mutex>
9#include <vector> 10#include <vector>
10 11
11#include "audio_core/behavior_info.h" 12#include "audio_core/behavior_info.h"
@@ -45,6 +46,8 @@ public:
45 46
46 [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, 47 [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
47 std::vector<u8>& output_params); 48 std::vector<u8>& output_params);
49 [[nodiscard]] ResultCode Start();
50 [[nodiscard]] ResultCode Stop();
48 void QueueMixedBuffer(Buffer::Tag tag); 51 void QueueMixedBuffer(Buffer::Tag tag);
49 void ReleaseAndQueueBuffers(); 52 void ReleaseAndQueueBuffers();
50 [[nodiscard]] u32 GetSampleRate() const; 53 [[nodiscard]] u32 GetSampleRate() const;
@@ -68,6 +71,9 @@ private:
68 Core::Memory::Memory& memory; 71 Core::Memory::Memory& memory;
69 CommandGenerator command_generator; 72 CommandGenerator command_generator;
70 std::size_t elapsed_frame_count{}; 73 std::size_t elapsed_frame_count{};
74 Core::Timing::CoreTiming& core_timing;
75 std::shared_ptr<Core::Timing::EventType> process_event;
76 std::mutex mutex;
71}; 77};
72 78
73} // namespace AudioCore 79} // namespace AudioCore
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 437cc5ccd..b99d0fc91 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
31 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f}; 31 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
32 32
33template <std::size_t N> 33template <std::size_t N>
34void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { 34void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) {
35 for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) { 35 for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) {
36 for (std::size_t j = 0; j < N; j++) { 36 for (std::size_t j = 0; j < N; j++) {
37 output[i + j] += 37 output[i + j] +=
@@ -40,7 +40,8 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
40 } 40 }
41} 41}
42 42
43s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) { 43s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta,
44 s32 sample_count) {
44 s32 x = 0; 45 s32 x = 0;
45 for (s32 i = 0; i < sample_count; i++) { 46 for (s32 i = 0; i < sample_count; i++) {
46 x = static_cast<s32>(static_cast<float>(input[i]) * gain); 47 x = static_cast<s32>(static_cast<float>(input[i]) * gain);
@@ -50,20 +51,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam
50 return x; 51 return x;
51} 52}
52 53
53void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) { 54void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta,
55 s32 sample_count) {
54 for (s32 i = 0; i < sample_count; i++) { 56 for (s32 i = 0; i < sample_count; i++) {
55 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); 57 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
56 gain += delta; 58 gain += delta;
57 } 59 }
58} 60}
59 61
60void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) { 62void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain,
63 s32 sample_count) {
61 for (s32 i = 0; i < sample_count; i++) { 64 for (s32 i = 0; i < sample_count; i++) {
62 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); 65 output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15);
63 } 66 }
64} 67}
65 68
66s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) { 69s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) {
67 const bool positive = first_sample > 0; 70 const bool positive = first_sample > 0;
68 auto final_sample = std::abs(first_sample); 71 auto final_sample = std::abs(first_sample);
69 for (s32 i = 0; i < sample_count; i++) { 72 for (s32 i = 0; i < sample_count; i++) {
@@ -128,10 +131,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1,
128 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; 131 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
129 132
130template <std::size_t CHANNEL_COUNT> 133template <std::size_t CHANNEL_COUNT>
131void ApplyReverbGeneric(I3dl2ReverbState& state, 134void ApplyReverbGeneric(
132 const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input, 135 I3dl2ReverbState& state,
133 const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output, 136 const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input,
134 s32 sample_count) { 137 const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) {
135 138
136 auto GetTapLookup = []() { 139 auto GetTapLookup = []() {
137 if constexpr (CHANNEL_COUNT == 1) { 140 if constexpr (CHANNEL_COUNT == 1) {
@@ -400,7 +403,10 @@ void CommandGenerator::GenerateDataSourceCommand(ServerVoiceInfo& voice_info, Vo
400 } 403 }
401 } else { 404 } else {
402 switch (in_params.sample_format) { 405 switch (in_params.sample_format) {
406 case SampleFormat::Pcm8:
403 case SampleFormat::Pcm16: 407 case SampleFormat::Pcm16:
408 case SampleFormat::Pcm32:
409 case SampleFormat::PcmFloat:
404 DecodeFromWaveBuffers(voice_info, GetChannelMixBuffer(channel), dsp_state, channel, 410 DecodeFromWaveBuffers(voice_info, GetChannelMixBuffer(channel), dsp_state, channel,
405 worker_params.sample_rate, worker_params.sample_count, 411 worker_params.sample_rate, worker_params.sample_count,
406 in_params.node_id); 412 in_params.node_id);
@@ -454,8 +460,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff
454 "input_mix_buffer={}, output_mix_buffer={}", 460 "input_mix_buffer={}, output_mix_buffer={}",
455 node_id, input_offset, output_offset); 461 node_id, input_offset, output_offset);
456 } 462 }
457 const auto* input = GetMixBuffer(input_offset); 463 std::span<const s32> input = GetMixBuffer(input_offset);
458 auto* output = GetMixBuffer(output_offset); 464 std::span<s32> output = GetMixBuffer(output_offset);
459 465
460 // Biquad filter parameters 466 // Biquad filter parameters
461 const auto [n0, n1, n2] = params.numerator; 467 const auto [n0, n1, n2] = params.numerator;
@@ -548,8 +554,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
548 return; 554 return;
549 } 555 }
550 556
551 std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{}; 557 std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{};
552 std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{}; 558 std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{};
553 559
554 const auto status = params.status; 560 const auto status = params.status;
555 for (s32 i = 0; i < channel_count; i++) { 561 for (s32 i = 0; i < channel_count; i++) {
@@ -584,7 +590,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E
584 for (s32 i = 0; i < channel_count; i++) { 590 for (s32 i = 0; i < channel_count; i++) {
585 // Only copy if the buffer input and output do not match! 591 // Only copy if the buffer input and output do not match!
586 if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) { 592 if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
587 std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32)); 593 std::memcpy(output[i].data(), input[i].data(),
594 worker_params.sample_count * sizeof(s32));
588 } 595 }
589 } 596 }
590 } 597 }
@@ -600,8 +607,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset,
600 for (s32 i = 0; i < channel_count; i++) { 607 for (s32 i = 0; i < channel_count; i++) {
601 // TODO(ogniK): Actually implement biquad filter 608 // TODO(ogniK): Actually implement biquad filter
602 if (params.input[i] != params.output[i]) { 609 if (params.input[i] != params.output[i]) {
603 const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); 610 std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]);
604 auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); 611 std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]);
605 ApplyMix<1>(output, input, 32768, worker_params.sample_count); 612 ApplyMix<1>(output, input, 32768, worker_params.sample_count);
606 } 613 }
607 } 614 }
@@ -640,14 +647,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf
640 647
641 if (samples_read != static_cast<int>(worker_params.sample_count) && 648 if (samples_read != static_cast<int>(worker_params.sample_count) &&
642 samples_read <= params.sample_count) { 649 samples_read <= params.sample_count) {
643 std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read); 650 std::memset(GetMixBuffer(output_index).data(), 0,
651 params.sample_count - samples_read);
644 } 652 }
645 } else { 653 } else {
646 AuxInfoDSP empty{}; 654 AuxInfoDSP empty{};
647 memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP)); 655 memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP));
648 memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP)); 656 memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP));
649 if (output_index != input_index) { 657 if (output_index != input_index) {
650 std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index), 658 std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(),
651 worker_params.sample_count * sizeof(s32)); 659 worker_params.sample_count * sizeof(s32));
652 } 660 }
653 } 661 }
@@ -665,7 +673,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter
665} 673}
666 674
667s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, 675s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
668 const s32* data, u32 sample_count, u32 write_offset, 676 std::span<const s32> data, u32 sample_count, u32 write_offset,
669 u32 write_count) { 677 u32 write_count) {
670 if (max_samples == 0) { 678 if (max_samples == 0) {
671 return 0; 679 return 0;
@@ -675,14 +683,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
675 return 0; 683 return 0;
676 } 684 }
677 685
678 std::size_t data_offset{}; 686 s32 data_offset{};
679 u32 remaining = sample_count; 687 u32 remaining = sample_count;
680 while (remaining > 0) { 688 while (remaining > 0) {
681 // Get position in buffer 689 // Get position in buffer
682 const auto base = send_buffer + (offset * sizeof(u32)); 690 const auto base = send_buffer + (offset * sizeof(u32));
683 const auto samples_to_grab = std::min(max_samples - offset, remaining); 691 const auto samples_to_grab = std::min(max_samples - offset, remaining);
684 // Write to output 692 // Write to output
685 memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32)); 693 memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32));
686 offset = (offset + samples_to_grab) % max_samples; 694 offset = (offset + samples_to_grab) % max_samples;
687 remaining -= samples_to_grab; 695 remaining -= samples_to_grab;
688 data_offset += samples_to_grab; 696 data_offset += samples_to_grab;
@@ -695,7 +703,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3
695} 703}
696 704
697s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, 705s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
698 s32* out_data, u32 sample_count, u32 read_offset, 706 std::span<s32> out_data, u32 sample_count, u32 read_offset,
699 u32 read_count) { 707 u32 read_count) {
700 if (max_samples == 0) { 708 if (max_samples == 0) {
701 return 0; 709 return 0;
@@ -707,15 +715,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
707 } 715 }
708 716
709 u32 remaining = sample_count; 717 u32 remaining = sample_count;
718 s32 data_offset{};
710 while (remaining > 0) { 719 while (remaining > 0) {
711 const auto base = recv_buffer + (offset * sizeof(u32)); 720 const auto base = recv_buffer + (offset * sizeof(u32));
712 const auto samples_to_grab = std::min(max_samples - offset, remaining); 721 const auto samples_to_grab = std::min(max_samples - offset, remaining);
713 std::vector<s32> buffer(samples_to_grab); 722 std::vector<s32> buffer(samples_to_grab);
714 memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32)); 723 memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32));
715 std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32)); 724 std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32));
716 out_data += samples_to_grab;
717 offset = (offset + samples_to_grab) % max_samples; 725 offset = (offset + samples_to_grab) % max_samples;
718 remaining -= samples_to_grab; 726 remaining -= samples_to_grab;
727 data_offset += samples_to_grab;
719 } 728 }
720 729
721 if (read_count != 0) { 730 if (read_count != 0) {
@@ -795,7 +804,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
795 state.lowpass_1 = 0.0f; 804 state.lowpass_1 = 0.0f;
796 } else { 805 } else {
797 const auto a = 1.0f - hf_gain; 806 const auto a = 1.0f - hf_gain;
798 const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference / 807 const auto b = 2.0f * (2.0f - hf_gain * CosD(256.0f * info.hf_reference /
799 static_cast<f32>(info.sample_rate))); 808 static_cast<f32>(info.sample_rate)));
800 const auto c = std::sqrt(b * b - 4.0f * a * a); 809 const auto c = std::sqrt(b * b - 4.0f * a * a);
801 810
@@ -843,7 +852,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta
843 } 852 }
844 853
845 const auto max_early_delay = state.early_delay_line.GetMaxDelay(); 854 const auto max_early_delay = state.early_delay_line.GetMaxDelay();
846 const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f); 855 const auto reflection_time = 1000.0f * (0.9998f * info.reverb_delay + 0.02f);
847 for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) { 856 for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
848 const auto length = AudioCommon::CalculateDelaySamples( 857 const auto length = AudioCommon::CalculateDelaySamples(
849 sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]); 858 sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
@@ -962,8 +971,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t
962 node_id, input_offset, output_offset, volume); 971 node_id, input_offset, output_offset, volume);
963 } 972 }
964 973
965 auto* output = GetMixBuffer(output_offset); 974 std::span<s32> output = GetMixBuffer(output_offset);
966 const auto* input = GetMixBuffer(input_offset); 975 std::span<const s32> input = GetMixBuffer(input_offset);
967 976
968 const s32 gain = static_cast<s32>(volume * 32768.0f); 977 const s32 gain = static_cast<s32>(volume * 32768.0f);
969 // Mix with loop unrolling 978 // Mix with loop unrolling
@@ -1003,8 +1012,10 @@ void CommandGenerator::GenerateFinalMixCommand() {
1003 } 1012 }
1004} 1013}
1005 1014
1006s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, 1015template <typename T>
1007 s32 sample_count, s32 channel, std::size_t mix_offset) { 1016s32 CommandGenerator::DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
1017 s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
1018 s32 channel, std::size_t mix_offset) {
1008 const auto& in_params = voice_info.GetInParams(); 1019 const auto& in_params = voice_info.GetInParams();
1009 const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; 1020 const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
1010 if (wave_buffer.buffer_address == 0) { 1021 if (wave_buffer.buffer_address == 0) {
@@ -1013,39 +1024,50 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s
1013 if (wave_buffer.buffer_size == 0) { 1024 if (wave_buffer.buffer_size == 0) {
1014 return 0; 1025 return 0;
1015 } 1026 }
1016 if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) { 1027 if (sample_end_offset < sample_start_offset) {
1017 return 0; 1028 return 0;
1018 } 1029 }
1019 const auto samples_remaining = 1030 const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
1020 (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
1021 const auto start_offset = 1031 const auto start_offset =
1022 ((wave_buffer.start_sample_offset + dsp_state.offset) * in_params.channel_count) * 1032 ((dsp_state.offset + sample_start_offset) * in_params.channel_count) * sizeof(T);
1023 sizeof(s16);
1024 const auto buffer_pos = wave_buffer.buffer_address + start_offset; 1033 const auto buffer_pos = wave_buffer.buffer_address + start_offset;
1025 const auto samples_processed = std::min(sample_count, samples_remaining); 1034 const auto samples_processed = std::min(sample_count, samples_remaining);
1026 1035
1027 if (in_params.channel_count == 1) { 1036 const auto channel_count = in_params.channel_count;
1028 std::vector<s16> buffer(samples_processed); 1037 std::vector<T> buffer(samples_processed * channel_count);
1029 memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16)); 1038 memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(T));
1030 for (std::size_t i = 0; i < buffer.size(); i++) {
1031 sample_buffer[mix_offset + i] = buffer[i];
1032 }
1033 } else {
1034 const auto channel_count = in_params.channel_count;
1035 std::vector<s16> buffer(samples_processed * channel_count);
1036 memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16));
1037 1039
1040 if constexpr (std::is_floating_point_v<T>) {
1041 for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
1042 sample_buffer[mix_offset + i] = static_cast<s32>(buffer[i * channel_count + channel] *
1043 std::numeric_limits<s16>::max());
1044 }
1045 } else if constexpr (sizeof(T) == 1) {
1046 for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
1047 sample_buffer[mix_offset + i] =
1048 static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
1049 std::numeric_limits<s8>::max()) *
1050 std::numeric_limits<s16>::max());
1051 }
1052 } else if constexpr (sizeof(T) == 2) {
1038 for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) { 1053 for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
1039 sample_buffer[mix_offset + i] = buffer[i * channel_count + channel]; 1054 sample_buffer[mix_offset + i] = buffer[i * channel_count + channel];
1040 } 1055 }
1056 } else {
1057 for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) {
1058 sample_buffer[mix_offset + i] =
1059 static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] /
1060 std::numeric_limits<s32>::max()) *
1061 std::numeric_limits<s16>::max());
1062 }
1041 } 1063 }
1042 1064
1043 return samples_processed; 1065 return samples_processed;
1044} 1066}
1045 1067
1046s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, 1068s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state,
1047 s32 sample_count, [[maybe_unused]] s32 channel, 1069 s32 sample_start_offset, s32 sample_end_offset, s32 sample_count,
1048 std::size_t mix_offset) { 1070 [[maybe_unused]] s32 channel, std::size_t mix_offset) {
1049 const auto& in_params = voice_info.GetInParams(); 1071 const auto& in_params = voice_info.GetInParams();
1050 const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; 1072 const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
1051 if (wave_buffer.buffer_address == 0) { 1073 if (wave_buffer.buffer_address == 0) {
@@ -1054,7 +1076,7 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
1054 if (wave_buffer.buffer_size == 0) { 1076 if (wave_buffer.buffer_size == 0) {
1055 return 0; 1077 return 0;
1056 } 1078 }
1057 if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) { 1079 if (sample_end_offset < sample_start_offset) {
1058 return 0; 1080 return 0;
1059 } 1081 }
1060 1082
@@ -1079,10 +1101,9 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
1079 s32 coef1 = coeffs[idx * 2]; 1101 s32 coef1 = coeffs[idx * 2];
1080 s32 coef2 = coeffs[idx * 2 + 1]; 1102 s32 coef2 = coeffs[idx * 2 + 1];
1081 1103
1082 const auto samples_remaining = 1104 const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset;
1083 (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset;
1084 const auto samples_processed = std::min(sample_count, samples_remaining); 1105 const auto samples_processed = std::min(sample_count, samples_remaining);
1085 const auto sample_pos = wave_buffer.start_sample_offset + dsp_state.offset; 1106 const auto sample_pos = dsp_state.offset + sample_start_offset;
1086 1107
1087 const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME; 1108 const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME;
1088 auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) + 1109 auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) +
@@ -1157,12 +1178,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s
1157 return samples_processed; 1178 return samples_processed;
1158} 1179}
1159 1180
1160s32* CommandGenerator::GetMixBuffer(std::size_t index) { 1181std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) {
1161 return mix_buffer.data() + (index * worker_params.sample_count); 1182 return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count),
1183 worker_params.sample_count);
1162} 1184}
1163 1185
1164const s32* CommandGenerator::GetMixBuffer(std::size_t index) const { 1186std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const {
1165 return mix_buffer.data() + (index * worker_params.sample_count); 1187 return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count),
1188 worker_params.sample_count);
1166} 1189}
1167 1190
1168std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const { 1191std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const {
@@ -1173,15 +1196,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const {
1173 return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT; 1196 return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT;
1174} 1197}
1175 1198
1176s32* CommandGenerator::GetChannelMixBuffer(s32 channel) { 1199std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) {
1177 return GetMixBuffer(worker_params.mix_buffer_count + channel); 1200 return GetMixBuffer(worker_params.mix_buffer_count + channel);
1178} 1201}
1179 1202
1180const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const { 1203std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const {
1181 return GetMixBuffer(worker_params.mix_buffer_count + channel); 1204 return GetMixBuffer(worker_params.mix_buffer_count + channel);
1182} 1205}
1183 1206
1184void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, 1207void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
1185 VoiceState& dsp_state, s32 channel, 1208 VoiceState& dsp_state, s32 channel,
1186 s32 target_sample_rate, s32 sample_count, 1209 s32 target_sample_rate, s32 sample_count,
1187 s32 node_id) { 1210 s32 node_id) {
@@ -1193,7 +1216,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1193 node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate, 1216 node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate,
1194 in_params.mix_id, in_params.splitter_info_id); 1217 in_params.mix_id, in_params.splitter_info_id);
1195 } 1218 }
1196 ASSERT_OR_EXECUTE(output != nullptr, { return; }); 1219 ASSERT_OR_EXECUTE(output.data() != nullptr, { return; });
1197 1220
1198 const auto resample_rate = static_cast<s32>( 1221 const auto resample_rate = static_cast<s32>(
1199 static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) * 1222 static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) *
@@ -1210,9 +1233,9 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1210 } 1233 }
1211 1234
1212 std::size_t temp_mix_offset{}; 1235 std::size_t temp_mix_offset{};
1213 bool is_buffer_completed{false}; 1236 s32 samples_output{};
1214 auto samples_remaining = sample_count; 1237 auto samples_remaining = sample_count;
1215 while (samples_remaining > 0 && !is_buffer_completed) { 1238 while (samples_remaining > 0) {
1216 const auto samples_to_output = std::min(samples_remaining, min_required_samples); 1239 const auto samples_to_output = std::min(samples_remaining, min_required_samples);
1217 const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15; 1240 const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15;
1218 1241
@@ -1229,24 +1252,53 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1229 const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; 1252 const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index];
1230 // No more data can be read 1253 // No more data can be read
1231 if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) { 1254 if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) {
1232 is_buffer_completed = true;
1233 break; 1255 break;
1234 } 1256 }
1235 1257
1236 if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 && 1258 if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 &&
1237 wave_buffer.context_address != 0 && wave_buffer.context_size != 0) { 1259 wave_buffer.context_address != 0 && wave_buffer.context_size != 0) {
1238 // TODO(ogniK): ADPCM loop context 1260 memory.ReadBlock(wave_buffer.context_address, &dsp_state.context,
1261 sizeof(ADPCMContext));
1262 }
1263
1264 s32 samples_offset_start;
1265 s32 samples_offset_end;
1266 if (dsp_state.loop_count > 0 && wave_buffer.loop_start_sample != 0 &&
1267 wave_buffer.loop_end_sample != 0 &&
1268 wave_buffer.loop_start_sample <= wave_buffer.loop_end_sample) {
1269 samples_offset_start = wave_buffer.loop_start_sample;
1270 samples_offset_end = wave_buffer.loop_end_sample;
1271 } else {
1272 samples_offset_start = wave_buffer.start_sample_offset;
1273 samples_offset_end = wave_buffer.end_sample_offset;
1239 } 1274 }
1240 1275
1241 s32 samples_decoded{0}; 1276 s32 samples_decoded{0};
1242 switch (in_params.sample_format) { 1277 switch (in_params.sample_format) {
1278 case SampleFormat::Pcm8:
1279 samples_decoded =
1280 DecodePcm<s8>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
1281 samples_to_read - samples_read, channel, temp_mix_offset);
1282 break;
1243 case SampleFormat::Pcm16: 1283 case SampleFormat::Pcm16:
1244 samples_decoded = DecodePcm16(voice_info, dsp_state, samples_to_read - samples_read, 1284 samples_decoded =
1245 channel, temp_mix_offset); 1285 DecodePcm<s16>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
1286 samples_to_read - samples_read, channel, temp_mix_offset);
1287 break;
1288 case SampleFormat::Pcm32:
1289 samples_decoded =
1290 DecodePcm<s32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
1291 samples_to_read - samples_read, channel, temp_mix_offset);
1292 break;
1293 case SampleFormat::PcmFloat:
1294 samples_decoded =
1295 DecodePcm<f32>(voice_info, dsp_state, samples_offset_start, samples_offset_end,
1296 samples_to_read - samples_read, channel, temp_mix_offset);
1246 break; 1297 break;
1247 case SampleFormat::Adpcm: 1298 case SampleFormat::Adpcm:
1248 samples_decoded = DecodeAdpcm(voice_info, dsp_state, samples_to_read - samples_read, 1299 samples_decoded =
1249 channel, temp_mix_offset); 1300 DecodeAdpcm(voice_info, dsp_state, samples_offset_start, samples_offset_end,
1301 samples_to_read - samples_read, channel, temp_mix_offset);
1250 break; 1302 break;
1251 default: 1303 default:
1252 UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format); 1304 UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format);
@@ -1257,15 +1309,19 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1257 dsp_state.offset += samples_decoded; 1309 dsp_state.offset += samples_decoded;
1258 dsp_state.played_sample_count += samples_decoded; 1310 dsp_state.played_sample_count += samples_decoded;
1259 1311
1260 if (dsp_state.offset >= 1312 if (dsp_state.offset >= (samples_offset_end - samples_offset_start) ||
1261 (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) ||
1262 samples_decoded == 0) { 1313 samples_decoded == 0) {
1263 // Reset our sample offset 1314 // Reset our sample offset
1264 dsp_state.offset = 0; 1315 dsp_state.offset = 0;
1265 if (wave_buffer.is_looping) { 1316 if (wave_buffer.is_looping) {
1266 if (samples_decoded == 0) { 1317 dsp_state.loop_count++;
1318 if (wave_buffer.loop_count > 0 &&
1319 (dsp_state.loop_count > wave_buffer.loop_count || samples_decoded == 0)) {
1267 // End of our buffer 1320 // End of our buffer
1268 is_buffer_completed = true; 1321 voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
1322 }
1323
1324 if (samples_decoded == 0) {
1269 break; 1325 break;
1270 } 1326 }
1271 1327
@@ -1273,35 +1329,29 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o
1273 dsp_state.played_sample_count = 0; 1329 dsp_state.played_sample_count = 0;
1274 } 1330 }
1275 } else { 1331 } else {
1276
1277 // Update our wave buffer states 1332 // Update our wave buffer states
1278 dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false; 1333 voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer);
1279 dsp_state.wave_buffer_consumed++;
1280 dsp_state.wave_buffer_index =
1281 (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
1282 if (wave_buffer.end_of_stream) {
1283 dsp_state.played_sample_count = 0;
1284 }
1285 } 1334 }
1286 } 1335 }
1287 } 1336 }
1288 1337
1289 if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) { 1338 if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) {
1290 // No need to resample 1339 // No need to resample
1291 std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32)); 1340 std::memcpy(output.data() + samples_output, sample_buffer.data(),
1341 samples_read * sizeof(s32));
1292 } else { 1342 } else {
1293 std::fill(sample_buffer.begin() + temp_mix_offset, 1343 std::fill(sample_buffer.begin() + temp_mix_offset,
1294 sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read), 1344 sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read),
1295 0); 1345 0);
1296 AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction, 1346 AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate,
1297 samples_to_output); 1347 dsp_state.fraction, samples_to_output);
1298 // Resample 1348 // Resample
1299 for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) { 1349 for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) {
1300 dsp_state.sample_history[i] = sample_buffer[samples_to_read + i]; 1350 dsp_state.sample_history[i] = sample_buffer[samples_to_read + i];
1301 } 1351 }
1302 } 1352 }
1303 output += samples_to_output;
1304 samples_remaining -= samples_to_output; 1353 samples_remaining -= samples_to_output;
1354 samples_output += samples_to_output;
1305 } 1355 }
1306} 1356}
1307 1357
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index 2ebb755b0..59a33ba76 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <span>
8#include "audio_core/common.h" 9#include "audio_core/common.h"
9#include "audio_core/voice_context.h" 10#include "audio_core/voice_context.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -41,10 +42,10 @@ public:
41 void PreCommand(); 42 void PreCommand();
42 void PostCommand(); 43 void PostCommand();
43 44
44 [[nodiscard]] s32* GetChannelMixBuffer(s32 channel); 45 [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel);
45 [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const; 46 [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const;
46 [[nodiscard]] s32* GetMixBuffer(std::size_t index); 47 [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index);
47 [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const; 48 [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const;
48 [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const; 49 [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const;
49 50
50 [[nodiscard]] std::size_t GetTotalMixBufferCount() const; 51 [[nodiscard]] std::size_t GetTotalMixBufferCount() const;
@@ -77,21 +78,24 @@ private:
77 void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled); 78 void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled);
78 [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index); 79 [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index);
79 80
80 s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data, 81 s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples,
81 u32 sample_count, u32 write_offset, u32 write_count); 82 std::span<const s32> data, u32 sample_count, u32 write_offset,
82 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, 83 u32 write_count);
83 u32 sample_count, u32 read_offset, u32 read_count); 84 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples,
85 std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count);
84 86
85 void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, 87 void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
86 std::vector<u8>& work_buffer); 88 std::vector<u8>& work_buffer);
87 void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear); 89 void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
88 // DSP Code 90 // DSP Code
89 s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, 91 template <typename T>
90 s32 channel, std::size_t mix_offset); 92 s32 DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
91 s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, 93 s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
92 s32 channel, std::size_t mix_offset); 94 s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset,
93 void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state, 95 s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset);
94 s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id); 96 void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output,
97 VoiceState& dsp_state, s32 channel, s32 target_sample_rate,
98 s32 sample_count, s32 node_id);
95 99
96 AudioCommon::AudioRendererParameter& worker_params; 100 AudioCommon::AudioRendererParameter& worker_params;
97 VoiceContext& voice_context; 101 VoiceContext& voice_context;
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index fe546c55d..1ab537588 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -15,7 +15,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
15constexpr ResultCode ERR_SPLITTER_SORT_FAILED{ErrorModule::Audio, 43}; 15constexpr ResultCode ERR_SPLITTER_SORT_FAILED{ErrorModule::Audio, 43};
16} // namespace Audren 16} // namespace Audren
17 17
18constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8'); 18constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '9');
19constexpr std::size_t MAX_MIX_BUFFERS = 24; 19constexpr std::size_t MAX_MIX_BUFFERS = 24;
20constexpr std::size_t MAX_BIQUAD_FILTERS = 2; 20constexpr std::size_t MAX_BIQUAD_FILTERS = 2;
21constexpr std::size_t MAX_CHANNEL_COUNT = 6; 21constexpr std::size_t MAX_CHANNEL_COUNT = 6;
diff --git a/src/audio_core/info_updater.cpp b/src/audio_core/info_updater.cpp
index 4a5b1b4ab..9b4ca1851 100644
--- a/src/audio_core/info_updater.cpp
+++ b/src/audio_core/info_updater.cpp
@@ -189,9 +189,6 @@ bool InfoUpdater::UpdateVoices(VoiceContext& voice_context,
189 if (voice_in_params.is_new) { 189 if (voice_in_params.is_new) {
190 // Default our values for our voice 190 // Default our values for our voice
191 voice_info.Initialize(); 191 voice_info.Initialize();
192 if (channel_count == 0 || channel_count > AudioCommon::MAX_CHANNEL_COUNT) {
193 continue;
194 }
195 192
196 // Zero out our voice states 193 // Zero out our voice states
197 for (std::size_t channel = 0; channel < channel_count; channel++) { 194 for (std::size_t channel = 0; channel < channel_count; channel++) {
diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp
index a69543696..cc55b290c 100644
--- a/src/audio_core/sink_context.cpp
+++ b/src/audio_core/sink_context.cpp
@@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const {
15void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) { 15void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) {
16 ASSERT(in.type == SinkTypes::Device); 16 ASSERT(in.type == SinkTypes::Device);
17 17
18 has_downmix_coefs = in.device.down_matrix_enabled; 18 if (in.device.down_matrix_enabled) {
19 if (has_downmix_coefs) {
20 downmix_coefficients = in.device.down_matrix_coef; 19 downmix_coefficients = in.device.down_matrix_coef;
20 } else {
21 downmix_coefficients = {
22 1.0f, // front
23 0.707f, // center
24 0.0f, // lfe
25 0.707f, // back
26 };
21 } 27 }
28
22 in_use = in.in_use; 29 in_use = in.in_use;
23 use_count = in.device.input_count; 30 use_count = in.device.input_count;
24 buffers = in.device.input; 31 buffers = in.device.input;
@@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const {
34 return buffer_ret; 41 return buffer_ret;
35} 42}
36 43
37bool SinkContext::HasDownMixingCoefficients() const {
38 return has_downmix_coefs;
39}
40
41const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const { 44const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const {
42 return downmix_coefficients; 45 return downmix_coefficients;
43} 46}
diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h
index 9e2b69785..254961fe2 100644
--- a/src/audio_core/sink_context.h
+++ b/src/audio_core/sink_context.h
@@ -84,7 +84,6 @@ public:
84 [[nodiscard]] bool InUse() const; 84 [[nodiscard]] bool InUse() const;
85 [[nodiscard]] std::vector<u8> OutputBuffers() const; 85 [[nodiscard]] std::vector<u8> OutputBuffers() const;
86 86
87 [[nodiscard]] bool HasDownMixingCoefficients() const;
88 [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const; 87 [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const;
89 88
90private: 89private:
@@ -92,7 +91,6 @@ private:
92 s32 use_count{}; 91 s32 use_count{};
93 std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{}; 92 std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{};
94 std::size_t sink_count{}; 93 std::size_t sink_count{};
95 bool has_downmix_coefs{false};
96 DownmixCoefficients downmix_coefficients{}; 94 DownmixCoefficients downmix_coefficients{};
97}; 95};
98} // namespace AudioCore 96} // namespace AudioCore
diff --git a/src/audio_core/voice_context.cpp b/src/audio_core/voice_context.cpp
index 867b8fc6b..d8c954b60 100644
--- a/src/audio_core/voice_context.cpp
+++ b/src/audio_core/voice_context.cpp
@@ -66,7 +66,7 @@ void ServerVoiceInfo::Initialize() {
66 in_params.last_volume = 0.0f; 66 in_params.last_volume = 0.0f;
67 in_params.biquad_filter.fill({}); 67 in_params.biquad_filter.fill({});
68 in_params.wave_buffer_count = 0; 68 in_params.wave_buffer_count = 0;
69 in_params.wave_bufffer_head = 0; 69 in_params.wave_buffer_head = 0;
70 in_params.mix_id = AudioCommon::NO_MIX; 70 in_params.mix_id = AudioCommon::NO_MIX;
71 in_params.splitter_info_id = AudioCommon::NO_SPLITTER; 71 in_params.splitter_info_id = AudioCommon::NO_SPLITTER;
72 in_params.additional_params_address = 0; 72 in_params.additional_params_address = 0;
@@ -75,7 +75,7 @@ void ServerVoiceInfo::Initialize() {
75 out_params.played_sample_count = 0; 75 out_params.played_sample_count = 0;
76 out_params.wave_buffer_consumed = 0; 76 out_params.wave_buffer_consumed = 0;
77 in_params.voice_drop_flag = false; 77 in_params.voice_drop_flag = false;
78 in_params.buffer_mapped = false; 78 in_params.buffer_mapped = true;
79 in_params.wave_buffer_flush_request_count = 0; 79 in_params.wave_buffer_flush_request_count = 0;
80 in_params.was_biquad_filter_enabled.fill(false); 80 in_params.was_biquad_filter_enabled.fill(false);
81 81
@@ -126,7 +126,7 @@ void ServerVoiceInfo::UpdateParameters(const VoiceInfo::InParams& voice_in,
126 in_params.volume = voice_in.volume; 126 in_params.volume = voice_in.volume;
127 in_params.biquad_filter = voice_in.biquad_filter; 127 in_params.biquad_filter = voice_in.biquad_filter;
128 in_params.wave_buffer_count = voice_in.wave_buffer_count; 128 in_params.wave_buffer_count = voice_in.wave_buffer_count;
129 in_params.wave_bufffer_head = voice_in.wave_buffer_head; 129 in_params.wave_buffer_head = voice_in.wave_buffer_head;
130 if (behavior_info.IsFlushVoiceWaveBuffersSupported()) { 130 if (behavior_info.IsFlushVoiceWaveBuffersSupported()) {
131 const auto in_request_count = in_params.wave_buffer_flush_request_count; 131 const auto in_request_count = in_params.wave_buffer_flush_request_count;
132 const auto voice_request_count = voice_in.wave_buffer_flush_request_count; 132 const auto voice_request_count = voice_in.wave_buffer_flush_request_count;
@@ -185,14 +185,16 @@ void ServerVoiceInfo::UpdateWaveBuffers(
185 wave_buffer.buffer_size = 0; 185 wave_buffer.buffer_size = 0;
186 wave_buffer.context_address = 0; 186 wave_buffer.context_address = 0;
187 wave_buffer.context_size = 0; 187 wave_buffer.context_size = 0;
188 wave_buffer.loop_start_sample = 0;
189 wave_buffer.loop_end_sample = 0;
188 wave_buffer.sent_to_dsp = true; 190 wave_buffer.sent_to_dsp = true;
189 } 191 }
190 192
191 // Mark all our wave buffers as invalid 193 // Mark all our wave buffers as invalid
192 for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count); 194 for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count);
193 channel++) { 195 channel++) {
194 for (auto& is_valid : voice_states[channel]->is_wave_buffer_valid) { 196 for (std::size_t i = 0; i < AudioCommon::MAX_WAVE_BUFFERS; ++i) {
195 is_valid = false; 197 voice_states[channel]->is_wave_buffer_valid[i] = false;
196 } 198 }
197 } 199 }
198 } 200 }
@@ -211,7 +213,7 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
211 const WaveBuffer& in_wave_buffer, SampleFormat sample_format, 213 const WaveBuffer& in_wave_buffer, SampleFormat sample_format,
212 bool is_buffer_valid, 214 bool is_buffer_valid,
213 [[maybe_unused]] BehaviorInfo& behavior_info) { 215 [[maybe_unused]] BehaviorInfo& behavior_info) {
214 if (!is_buffer_valid && out_wavebuffer.sent_to_dsp) { 216 if (!is_buffer_valid && out_wavebuffer.sent_to_dsp && out_wavebuffer.buffer_address != 0) {
215 out_wavebuffer.buffer_address = 0; 217 out_wavebuffer.buffer_address = 0;
216 out_wavebuffer.buffer_size = 0; 218 out_wavebuffer.buffer_size = 0;
217 } 219 }
@@ -219,11 +221,40 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
219 if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) { 221 if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) {
220 // Validate sample offset sizings 222 // Validate sample offset sizings
221 if (sample_format == SampleFormat::Pcm16) { 223 if (sample_format == SampleFormat::Pcm16) {
222 const auto buffer_size = in_wave_buffer.buffer_size; 224 const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
223 if (in_wave_buffer.start_sample_offset < 0 || in_wave_buffer.end_sample_offset < 0 || 225 const s64 start = sizeof(s16) * in_wave_buffer.start_sample_offset;
224 (buffer_size < (sizeof(s16) * in_wave_buffer.start_sample_offset)) || 226 const s64 end = sizeof(s16) * in_wave_buffer.end_sample_offset;
225 (buffer_size < (sizeof(s16) * in_wave_buffer.end_sample_offset))) { 227 if (0 > start || start > buffer_size || 0 > end || end > buffer_size) {
226 // TODO(ogniK): Write error info 228 // TODO(ogniK): Write error info
229 LOG_ERROR(Audio,
230 "PCM16 wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
231 "offsets were "
232 "{:08X} - 0x{:08X}",
233 buffer_size, sizeof(s16) * in_wave_buffer.start_sample_offset,
234 sizeof(s16) * in_wave_buffer.end_sample_offset);
235 return;
236 }
237 } else if (sample_format == SampleFormat::Adpcm) {
238 const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size);
239 const s64 start_frames = in_wave_buffer.start_sample_offset / 14;
240 const s64 start_extra = in_wave_buffer.start_sample_offset % 14 == 0
241 ? 0
242 : (in_wave_buffer.start_sample_offset % 14) / 2 + 1 +
243 (in_wave_buffer.start_sample_offset % 2);
244 const s64 start = start_frames * 8 + start_extra;
245 const s64 end_frames = in_wave_buffer.end_sample_offset / 14;
246 const s64 end_extra = in_wave_buffer.end_sample_offset % 14 == 0
247 ? 0
248 : (in_wave_buffer.end_sample_offset % 14) / 2 + 1 +
249 (in_wave_buffer.end_sample_offset % 2);
250 const s64 end = end_frames * 8 + end_extra;
251 if (in_wave_buffer.start_sample_offset < 0 || start > buffer_size ||
252 in_wave_buffer.end_sample_offset < 0 || end > buffer_size) {
253 LOG_ERROR(Audio,
254 "ADPMC wavebuffer has an invalid size. Buffer has size 0x{:08X}, but "
255 "offsets were "
256 "{:08X} - 0x{:08X}",
257 in_wave_buffer.buffer_size, start, end);
227 return; 258 return;
228 } 259 }
229 } 260 }
@@ -239,29 +270,34 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer,
239 out_wavebuffer.buffer_size = in_wave_buffer.buffer_size; 270 out_wavebuffer.buffer_size = in_wave_buffer.buffer_size;
240 out_wavebuffer.context_address = in_wave_buffer.context_address; 271 out_wavebuffer.context_address = in_wave_buffer.context_address;
241 out_wavebuffer.context_size = in_wave_buffer.context_size; 272 out_wavebuffer.context_size = in_wave_buffer.context_size;
273 out_wavebuffer.loop_start_sample = in_wave_buffer.loop_start_sample;
274 out_wavebuffer.loop_end_sample = in_wave_buffer.loop_end_sample;
242 in_params.buffer_mapped = 275 in_params.buffer_mapped =
243 in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0; 276 in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0;
244 // TODO(ogniK): Pool mapper attachment 277 // TODO(ogniK): Pool mapper attachment
245 // TODO(ogniK): IsAdpcmLoopContextBugFixed 278 // TODO(ogniK): IsAdpcmLoopContextBugFixed
279 if (sample_format == SampleFormat::Adpcm && in_wave_buffer.context_address != 0 &&
280 in_wave_buffer.context_size != 0 && behavior_info.IsAdpcmLoopContextBugFixed()) {
281 } else {
282 out_wavebuffer.context_address = 0;
283 out_wavebuffer.context_size = 0;
284 }
246 } 285 }
247} 286}
248 287
249void ServerVoiceInfo::WriteOutStatus( 288void ServerVoiceInfo::WriteOutStatus(
250 VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in, 289 VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in,
251 std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) { 290 std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) {
252 if (voice_in.is_new) { 291 if (voice_in.is_new || in_params.is_new) {
253 in_params.is_new = true; 292 in_params.is_new = true;
254 voice_out.wave_buffer_consumed = 0; 293 voice_out.wave_buffer_consumed = 0;
255 voice_out.played_sample_count = 0; 294 voice_out.played_sample_count = 0;
256 voice_out.voice_dropped = false; 295 voice_out.voice_dropped = false;
257 } else if (!in_params.is_new) {
258 voice_out.wave_buffer_consumed = voice_states[0]->wave_buffer_consumed;
259 voice_out.played_sample_count = voice_states[0]->played_sample_count;
260 voice_out.voice_dropped = in_params.voice_drop_flag;
261 } else { 296 } else {
262 voice_out.wave_buffer_consumed = 0; 297 const auto& state = voice_states[0];
263 voice_out.played_sample_count = 0; 298 voice_out.wave_buffer_consumed = state->wave_buffer_consumed;
264 voice_out.voice_dropped = false; 299 voice_out.played_sample_count = state->played_sample_count;
300 voice_out.voice_dropped = state->voice_dropped;
265 } 301 }
266} 302}
267 303
@@ -283,7 +319,8 @@ ServerVoiceInfo::OutParams& ServerVoiceInfo::GetOutParams() {
283 319
284bool ServerVoiceInfo::ShouldSkip() const { 320bool ServerVoiceInfo::ShouldSkip() const {
285 // TODO(ogniK): Handle unmapped wave buffers or parameters 321 // TODO(ogniK): Handle unmapped wave buffers or parameters
286 return !in_params.in_use || (in_params.wave_buffer_count == 0) || in_params.voice_drop_flag; 322 return !in_params.in_use || in_params.wave_buffer_count == 0 || !in_params.buffer_mapped ||
323 in_params.voice_drop_flag;
287} 324}
288 325
289bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) { 326bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) {
@@ -381,7 +418,7 @@ bool ServerVoiceInfo::UpdateParametersForCommandGeneration(
381void ServerVoiceInfo::FlushWaveBuffers( 418void ServerVoiceInfo::FlushWaveBuffers(
382 u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states, 419 u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
383 s32 channel_count) { 420 s32 channel_count) {
384 auto wave_head = in_params.wave_bufffer_head; 421 auto wave_head = in_params.wave_buffer_head;
385 422
386 for (u8 i = 0; i < flush_count; i++) { 423 for (u8 i = 0; i < flush_count; i++) {
387 in_params.wave_buffer[wave_head].sent_to_dsp = true; 424 in_params.wave_buffer[wave_head].sent_to_dsp = true;
@@ -401,6 +438,17 @@ bool ServerVoiceInfo::HasValidWaveBuffer(const VoiceState* state) const {
401 return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end(); 438 return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end();
402} 439}
403 440
441void ServerVoiceInfo::SetWaveBufferCompleted(VoiceState& dsp_state,
442 const ServerWaveBuffer& wave_buffer) {
443 dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false;
444 dsp_state.wave_buffer_consumed++;
445 dsp_state.wave_buffer_index = (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS;
446 dsp_state.loop_count = 0;
447 if (wave_buffer.end_of_stream) {
448 dsp_state.played_sample_count = 0;
449 }
450}
451
404VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} { 452VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} {
405 for (std::size_t i = 0; i < voice_count; i++) { 453 for (std::size_t i = 0; i < voice_count; i++) {
406 voice_channel_resources.emplace_back(static_cast<s32>(i)); 454 voice_channel_resources.emplace_back(static_cast<s32>(i));
diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h
index 70359cadb..e1050897b 100644
--- a/src/audio_core/voice_context.h
+++ b/src/audio_core/voice_context.h
@@ -60,10 +60,12 @@ struct WaveBuffer {
60 u8 is_looping{}; 60 u8 is_looping{};
61 u8 end_of_stream{}; 61 u8 end_of_stream{};
62 u8 sent_to_server{}; 62 u8 sent_to_server{};
63 INSERT_PADDING_BYTES(5); 63 INSERT_PADDING_BYTES(1);
64 s32 loop_count{};
64 u64 context_address{}; 65 u64 context_address{};
65 u64 context_size{}; 66 u64 context_size{};
66 INSERT_PADDING_BYTES(8); 67 u32 loop_start_sample{};
68 u32 loop_end_sample{};
67}; 69};
68static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size"); 70static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size");
69 71
@@ -76,6 +78,9 @@ struct ServerWaveBuffer {
76 bool end_of_stream{}; 78 bool end_of_stream{};
77 VAddr context_address{}; 79 VAddr context_address{};
78 std::size_t context_size{}; 80 std::size_t context_size{};
81 s32 loop_count{};
82 u32 loop_start_sample{};
83 u32 loop_end_sample{};
79 bool sent_to_dsp{true}; 84 bool sent_to_dsp{true};
80}; 85};
81 86
@@ -108,6 +113,7 @@ struct VoiceState {
108 u32 external_context_size; 113 u32 external_context_size;
109 bool is_external_context_used; 114 bool is_external_context_used;
110 bool voice_dropped; 115 bool voice_dropped;
116 s32 loop_count;
111}; 117};
112 118
113class VoiceChannelResource { 119class VoiceChannelResource {
@@ -206,7 +212,7 @@ public:
206 float last_volume{}; 212 float last_volume{};
207 std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{}; 213 std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{};
208 s32 wave_buffer_count{}; 214 s32 wave_buffer_count{};
209 s16 wave_bufffer_head{}; 215 s16 wave_buffer_head{};
210 INSERT_PADDING_BYTES(2); 216 INSERT_PADDING_BYTES(2);
211 BehaviorFlags behavior_flags{}; 217 BehaviorFlags behavior_flags{};
212 VAddr additional_params_address{}; 218 VAddr additional_params_address{};
@@ -252,6 +258,7 @@ public:
252 void FlushWaveBuffers(u8 flush_count, 258 void FlushWaveBuffers(u8 flush_count,
253 std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states, 259 std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states,
254 s32 channel_count); 260 s32 channel_count);
261 void SetWaveBufferCompleted(VoiceState& dsp_state, const ServerWaveBuffer& wave_buffer);
255 262
256private: 263private:
257 std::vector<s16> stored_samples; 264 std::vector<s16> stored_samples;
diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp
index 077f34995..274f57659 100644
--- a/src/common/fs/file.cpp
+++ b/src/common/fs/file.cpp
@@ -306,9 +306,9 @@ bool IOFile::Flush() const {
306 errno = 0; 306 errno = 0;
307 307
308#ifdef _WIN32 308#ifdef _WIN32
309 const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; 309 const auto flush_result = std::fflush(file) == 0;
310#else 310#else
311 const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; 311 const auto flush_result = std::fflush(file) == 0;
312#endif 312#endif
313 313
314 if (!flush_result) { 314 if (!flush_result) {
@@ -320,6 +320,28 @@ bool IOFile::Flush() const {
320 return flush_result; 320 return flush_result;
321} 321}
322 322
323bool IOFile::Commit() const {
324 if (!IsOpen()) {
325 return false;
326 }
327
328 errno = 0;
329
330#ifdef _WIN32
331 const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0;
332#else
333 const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0;
334#endif
335
336 if (!commit_result) {
337 const auto ec = std::error_code{errno, std::generic_category()};
338 LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}",
339 PathToUTF8String(file_path), ec.message());
340 }
341
342 return commit_result;
343}
344
323bool IOFile::SetSize(u64 size) const { 345bool IOFile::SetSize(u64 size) const {
324 if (!IsOpen()) { 346 if (!IsOpen()) {
325 return false; 347 return false;
@@ -347,6 +369,9 @@ u64 IOFile::GetSize() const {
347 return 0; 369 return 0;
348 } 370 }
349 371
372 // Flush any unwritten buffered data into the file prior to retrieving the file size.
373 std::fflush(file);
374
350 std::error_code ec; 375 std::error_code ec;
351 376
352 const auto file_size = fs::file_size(file_path, ec); 377 const auto file_size = fs::file_size(file_path, ec);
diff --git a/src/common/fs/file.h b/src/common/fs/file.h
index 588fe619d..2c4ab4332 100644
--- a/src/common/fs/file.h
+++ b/src/common/fs/file.h
@@ -396,13 +396,22 @@ public:
396 [[nodiscard]] size_t WriteString(std::span<const char> string) const; 396 [[nodiscard]] size_t WriteString(std::span<const char> string) const;
397 397
398 /** 398 /**
399 * Attempts to flush any unwritten buffered data into the file and flush the file into the disk. 399 * Attempts to flush any unwritten buffered data into the file.
400 * 400 *
401 * @returns True if the flush was successful, false otherwise. 401 * @returns True if the flush was successful, false otherwise.
402 */ 402 */
403 bool Flush() const; 403 bool Flush() const;
404 404
405 /** 405 /**
406 * Attempts to commit the file into the disk.
407 * Note that this is an expensive operation as this forces the operating system to write
408 * the contents of the file associated with the file descriptor into the disk.
409 *
410 * @returns True if the commit was successful, false otherwise.
411 */
412 bool Commit() const;
413
414 /**
406 * Resizes the file to a given size. 415 * Resizes the file to a given size.
407 * If the file is resized to a smaller size, the remainder of the file is discarded. 416 * If the file is resized to a smaller size, the remainder of the file is discarded.
408 * If the file is resized to a larger size, the new area appears as if zero-filled. 417 * If the file is resized to a larger size, the new area appears as if zero-filled.
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index b6fa4affb..61dddab3f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) {
171FileBackend::~FileBackend() = default; 171FileBackend::~FileBackend() = default;
172 172
173void FileBackend::Write(const Entry& entry) { 173void FileBackend::Write(const Entry& entry) {
174 if (!file->IsOpen()) {
175 return;
176 }
177
174 using namespace Common::Literals; 178 using namespace Common::Literals;
175 // prevent logs from going over the maximum size (in case its spamming and the user doesn't 179 // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
176 // know)
177 constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB; 180 constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
178 constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB; 181 constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
179 182
180 if (!file->IsOpen()) { 183 const bool write_limit_exceeded =
181 return; 184 bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
182 } 185 (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
183 186
184 if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) { 187 // Close the file after the write limit is exceeded.
185 return; 188 if (write_limit_exceeded) {
186 } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) { 189 file->Close();
187 return; 190 return;
188 } 191 }
189 192
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 83b5b7676..b2b0dbe05 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -654,24 +654,19 @@ endif()
654 654
655if (MSVC) 655if (MSVC)
656 target_compile_options(core PRIVATE 656 target_compile_options(core PRIVATE
657 /we4018 # 'expression' : signed/unsigned mismatch 657 /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
658 /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) 658 /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
659 /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch 659 /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
660 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data 660 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
661 /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
662 /we4305 # 'context' : truncation from 'type1' to 'type2'
663 /we4456 # Declaration of 'identifier' hides previous local declaration 661 /we4456 # Declaration of 'identifier' hides previous local declaration
664 /we4457 # Declaration of 'identifier' hides function parameter 662 /we4457 # Declaration of 'identifier' hides function parameter
665 /we4458 # Declaration of 'identifier' hides class member 663 /we4458 # Declaration of 'identifier' hides class member
666 /we4459 # Declaration of 'identifier' hides global declaration 664 /we4459 # Declaration of 'identifier' hides global declaration
667 /we4715 # 'function' : not all control paths return a value
668 ) 665 )
669else() 666else()
670 target_compile_options(core PRIVATE 667 target_compile_options(core PRIVATE
671 -Werror=conversion 668 -Werror=conversion
672 -Werror=ignored-qualifiers 669 -Werror=ignored-qualifiers
673 -Werror=implicit-fallthrough
674 -Werror=sign-compare
675 -Werror=shadow 670 -Werror=shadow
676 671
677 $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> 672 $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 53b8b7ca0..7c0950bb0 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -345,8 +345,10 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
345static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type, 345static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type,
346 const Service::FileSystem::FileSystemController& fs_controller) { 346 const Service::FileSystem::FileSystemController& fs_controller) {
347 const auto load_dir = fs_controller.GetModificationLoadRoot(title_id); 347 const auto load_dir = fs_controller.GetModificationLoadRoot(title_id);
348 const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
348 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || 349 if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
349 load_dir == nullptr || load_dir->GetSize() <= 0) { 350 ((load_dir == nullptr || load_dir->GetSize() <= 0) &&
351 (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) {
350 return; 352 return;
351 } 353 }
352 354
@@ -356,7 +358,10 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
356 } 358 }
357 359
358 const auto& disabled = Settings::values.disabled_addons[title_id]; 360 const auto& disabled = Settings::values.disabled_addons[title_id];
359 auto patch_dirs = load_dir->GetSubdirectories(); 361 std::vector<VirtualDir> patch_dirs = load_dir->GetSubdirectories();
362 if (std::find(disabled.cbegin(), disabled.cend(), "SDMC") == disabled.cend()) {
363 patch_dirs.push_back(sdmc_load_dir);
364 }
360 std::sort(patch_dirs.begin(), patch_dirs.end(), 365 std::sort(patch_dirs.begin(), patch_dirs.end(),
361 [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); }); 366 [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
362 367
@@ -402,7 +407,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
402} 407}
403 408
404VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type, 409VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type,
405 VirtualFile update_raw) const { 410 VirtualFile update_raw, bool apply_layeredfs) const {
406 const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}", 411 const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}",
407 title_id, static_cast<u8>(type)); 412 title_id, static_cast<u8>(type));
408 413
@@ -442,7 +447,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
442 } 447 }
443 448
444 // LayeredFS 449 // LayeredFS
445 ApplyLayeredFS(romfs, title_id, type, fs_controller); 450 if (apply_layeredfs) {
451 ApplyLayeredFS(romfs, title_id, type, fs_controller);
452 }
446 453
447 return romfs; 454 return romfs;
448} 455}
@@ -524,6 +531,15 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u
524 } 531 }
525 } 532 }
526 533
534 // SDMC mod directory (RomFS LayeredFS)
535 const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id);
536 if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0 &&
537 IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "romfs"))) {
538 const auto mod_disabled =
539 std::find(disabled.begin(), disabled.end(), "SDMC") != disabled.end();
540 out.insert_or_assign(mod_disabled ? "[D] SDMC" : "SDMC", "LayeredFS");
541 }
542
527 // DLC 543 // DLC
528 const auto dlc_entries = 544 const auto dlc_entries =
529 content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data); 545 content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index fb1853035..3be871f35 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -64,7 +64,8 @@ public:
64 // - LayeredFS 64 // - LayeredFS
65 [[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset, 65 [[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset,
66 ContentRecordType type = ContentRecordType::Program, 66 ContentRecordType type = ContentRecordType::Program,
67 VirtualFile update_raw = nullptr) const; 67 VirtualFile update_raw = nullptr,
68 bool apply_layeredfs = true) const;
68 69
69 // Returns a vector of pairs between patch names and patch versions. 70 // Returns a vector of pairs between patch names and patch versions.
70 // i.e. Update 3.2.2 will return {"Update", "3.2.2"} 71 // i.e. Update 3.2.2 will return {"Update", "3.2.2"}
diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp
index cb56d8f2d..e5c72cd4d 100644
--- a/src/core/file_sys/sdmc_factory.cpp
+++ b/src/core/file_sys/sdmc_factory.cpp
@@ -12,23 +12,32 @@ namespace FileSys {
12 12
13constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB 13constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB
14 14
15SDMCFactory::SDMCFactory(VirtualDir dir_) 15SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_)
16 : dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>( 16 : sd_dir(std::move(sd_dir_)), sd_mod_dir(std::move(sd_mod_dir_)),
17 GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"), 17 contents(std::make_unique<RegisteredCache>(
18 [](const VirtualFile& file, const NcaID& id) { 18 GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/registered"),
19 return NAX{file, id}.GetDecrypted(); 19 [](const VirtualFile& file, const NcaID& id) {
20 })), 20 return NAX{file, id}.GetDecrypted();
21 })),
21 placeholder(std::make_unique<PlaceholderCache>( 22 placeholder(std::make_unique<PlaceholderCache>(
22 GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/placehld"))) {} 23 GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/placehld"))) {}
23 24
24SDMCFactory::~SDMCFactory() = default; 25SDMCFactory::~SDMCFactory() = default;
25 26
26ResultVal<VirtualDir> SDMCFactory::Open() const { 27ResultVal<VirtualDir> SDMCFactory::Open() const {
27 return MakeResult<VirtualDir>(dir); 28 return MakeResult<VirtualDir>(sd_dir);
29}
30
31VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const {
32 // LayeredFS doesn't work on updates and title id-less homebrew
33 if (title_id == 0 || (title_id & 0xFFF) == 0x800) {
34 return nullptr;
35 }
36 return GetOrCreateDirectoryRelative(sd_mod_dir, fmt::format("/{:016X}", title_id));
28} 37}
29 38
30VirtualDir SDMCFactory::GetSDMCContentDirectory() const { 39VirtualDir SDMCFactory::GetSDMCContentDirectory() const {
31 return GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents"); 40 return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents");
32} 41}
33 42
34RegisteredCache* SDMCFactory::GetSDMCContents() const { 43RegisteredCache* SDMCFactory::GetSDMCContents() const {
@@ -40,11 +49,11 @@ PlaceholderCache* SDMCFactory::GetSDMCPlaceholder() const {
40} 49}
41 50
42VirtualDir SDMCFactory::GetImageDirectory() const { 51VirtualDir SDMCFactory::GetImageDirectory() const {
43 return GetOrCreateDirectoryRelative(dir, "/Nintendo/Album"); 52 return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Album");
44} 53}
45 54
46u64 SDMCFactory::GetSDMCFreeSpace() const { 55u64 SDMCFactory::GetSDMCFreeSpace() const {
47 return GetSDMCTotalSpace() - dir->GetSize(); 56 return GetSDMCTotalSpace() - sd_dir->GetSize();
48} 57}
49 58
50u64 SDMCFactory::GetSDMCTotalSpace() const { 59u64 SDMCFactory::GetSDMCTotalSpace() const {
diff --git a/src/core/file_sys/sdmc_factory.h b/src/core/file_sys/sdmc_factory.h
index 2bb92ba93..3a3d11f3a 100644
--- a/src/core/file_sys/sdmc_factory.h
+++ b/src/core/file_sys/sdmc_factory.h
@@ -16,11 +16,12 @@ class PlaceholderCache;
16/// File system interface to the SDCard archive 16/// File system interface to the SDCard archive
17class SDMCFactory { 17class SDMCFactory {
18public: 18public:
19 explicit SDMCFactory(VirtualDir dir); 19 explicit SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_);
20 ~SDMCFactory(); 20 ~SDMCFactory();
21 21
22 ResultVal<VirtualDir> Open() const; 22 ResultVal<VirtualDir> Open() const;
23 23
24 VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
24 VirtualDir GetSDMCContentDirectory() const; 25 VirtualDir GetSDMCContentDirectory() const;
25 26
26 RegisteredCache* GetSDMCContents() const; 27 RegisteredCache* GetSDMCContents() const;
@@ -32,7 +33,8 @@ public:
32 u64 GetSDMCTotalSpace() const; 33 u64 GetSDMCTotalSpace() const;
33 34
34private: 35private:
35 VirtualDir dir; 36 VirtualDir sd_dir;
37 VirtualDir sd_mod_dir;
36 38
37 std::unique_ptr<RegisteredCache> contents; 39 std::unique_ptr<RegisteredCache> contents;
38 std::unique_ptr<PlaceholderCache> placeholder; 40 std::unique_ptr<PlaceholderCache> placeholder;
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 61bda3786..ceff2532d 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -345,8 +345,12 @@ public:
345 explicit RequestParser(u32* command_buffer) : RequestHelperBase(command_buffer) {} 345 explicit RequestParser(u32* command_buffer) : RequestHelperBase(command_buffer) {}
346 346
347 explicit RequestParser(Kernel::HLERequestContext& ctx) : RequestHelperBase(ctx) { 347 explicit RequestParser(Kernel::HLERequestContext& ctx) : RequestHelperBase(ctx) {
348 ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete"); 348 // TIPC does not have data payload offset
349 Skip(ctx.GetDataPayloadOffset(), false); 349 if (!ctx.IsTipc()) {
350 ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete");
351 Skip(ctx.GetDataPayloadOffset(), false);
352 }
353
350 // Skip the u64 command id, it's already stored in the context 354 // Skip the u64 command id, it's already stored in the context
351 static constexpr u32 CommandIdSize = 2; 355 static constexpr u32 CommandIdSize = 2;
352 Skip(CommandIdSize, false); 356 Skip(CommandIdSize, false);
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index fec704c65..dd945e058 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -117,7 +117,7 @@ AOC_U::AOC_U(Core::System& system_)
117 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, 117 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
118 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, 118 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
119 {9, nullptr, "GetAddOnContentLostErrorCode"}, 119 {9, nullptr, "GetAddOnContentLostErrorCode"},
120 {10, nullptr, "GetAddOnContentListChangedEventWithProcessId"}, 120 {10, &AOC_U::GetAddOnContentListChangedEventWithProcessId, "GetAddOnContentListChangedEventWithProcessId"},
121 {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"}, 121 {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"},
122 {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"}, 122 {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"},
123 {110, nullptr, "CreateContentsServiceManager"}, 123 {110, nullptr, "CreateContentsServiceManager"},
@@ -257,6 +257,14 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) {
257 rb.PushCopyObjects(aoc_change_event.GetReadableEvent()); 257 rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
258} 258}
259 259
260void AOC_U::GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx) {
261 LOG_WARNING(Service_AOC, "(STUBBED) called");
262
263 IPC::ResponseBuilder rb{ctx, 2, 1};
264 rb.Push(ResultSuccess);
265 rb.PushCopyObjects(aoc_change_event.GetReadableEvent());
266}
267
260void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) { 268void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) {
261 LOG_WARNING(Service_AOC, "(STUBBED) called"); 269 LOG_WARNING(Service_AOC, "(STUBBED) called");
262 270
diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h
index 65095baa2..bb6ffb8eb 100644
--- a/src/core/hle/service/aoc/aoc_u.h
+++ b/src/core/hle/service/aoc/aoc_u.h
@@ -28,6 +28,7 @@ private:
28 void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx); 28 void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx);
29 void PrepareAddOnContent(Kernel::HLERequestContext& ctx); 29 void PrepareAddOnContent(Kernel::HLERequestContext& ctx);
30 void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx); 30 void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx);
31 void GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx);
31 void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx); 32 void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
32 void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx); 33 void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx);
33 34
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 800feba6e..b769fe959 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -96,7 +96,7 @@ private:
96 void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { 96 void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
97 LOG_DEBUG(Service_Audio, "(STUBBED) called"); 97 LOG_DEBUG(Service_Audio, "(STUBBED) called");
98 98
99 std::vector<u8> output_params(ctx.GetWriteBufferSize()); 99 std::vector<u8> output_params(ctx.GetWriteBufferSize(), 0);
100 auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params); 100 auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params);
101 101
102 if (result.IsSuccess()) { 102 if (result.IsSuccess()) {
@@ -110,17 +110,19 @@ private:
110 void Start(Kernel::HLERequestContext& ctx) { 110 void Start(Kernel::HLERequestContext& ctx) {
111 LOG_WARNING(Service_Audio, "(STUBBED) called"); 111 LOG_WARNING(Service_Audio, "(STUBBED) called");
112 112
113 IPC::ResponseBuilder rb{ctx, 2}; 113 const auto result = renderer->Start();
114 114
115 rb.Push(ResultSuccess); 115 IPC::ResponseBuilder rb{ctx, 2};
116 rb.Push(result);
116 } 117 }
117 118
118 void Stop(Kernel::HLERequestContext& ctx) { 119 void Stop(Kernel::HLERequestContext& ctx) {
119 LOG_WARNING(Service_Audio, "(STUBBED) called"); 120 LOG_WARNING(Service_Audio, "(STUBBED) called");
120 121
121 IPC::ResponseBuilder rb{ctx, 2}; 122 const auto result = renderer->Stop();
122 123
123 rb.Push(ResultSuccess); 124 IPC::ResponseBuilder rb{ctx, 2};
125 rb.Push(result);
124 } 126 }
125 127
126 void QuerySystemEvent(Kernel::HLERequestContext& ctx) { 128 void QuerySystemEvent(Kernel::HLERequestContext& ctx) {
@@ -288,7 +290,7 @@ private:
288 290
289 IPC::ResponseBuilder rb{ctx, 3}; 291 IPC::ResponseBuilder rb{ctx, 3};
290 rb.Push(ResultSuccess); 292 rb.Push(ResultSuccess);
291 rb.Push<u32>(1); 293 rb.Push<u32>(2);
292 } 294 }
293 295
294 // Should be similar to QueryAudioDeviceOutputEvent 296 // Should be similar to QueryAudioDeviceOutputEvent
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 10e6f7a64..33a6dbbb6 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -253,7 +253,11 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
253 rb.Push<u32>(worker_buffer_sz); 253 rb.Push<u32>(worker_buffer_sz);
254} 254}
255 255
256void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { 256void HwOpus::GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx) {
257 GetWorkBufferSize(ctx);
258}
259
260void HwOpus::OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx) {
257 IPC::RequestParser rp{ctx}; 261 IPC::RequestParser rp{ctx};
258 const auto sample_rate = rp.Pop<u32>(); 262 const auto sample_rate = rp.Pop<u32>();
259 const auto channel_count = rp.Pop<u32>(); 263 const auto channel_count = rp.Pop<u32>();
@@ -291,14 +295,47 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
291 system, OpusDecoderState{std::move(decoder), sample_rate, channel_count}); 295 system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
292} 296}
293 297
298void HwOpus::OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx) {
299 IPC::RequestParser rp{ctx};
300 const auto sample_rate = rp.Pop<u32>();
301 const auto channel_count = rp.Pop<u32>();
302
303 LOG_CRITICAL(Audio, "called sample_rate={}, channel_count={}", sample_rate, channel_count);
304
305 ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
306 sample_rate == 12000 || sample_rate == 8000,
307 "Invalid sample rate");
308 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
309
310 const int num_stereo_streams = channel_count == 2 ? 1 : 0;
311 const auto mapping_table = CreateMappingTable(channel_count);
312
313 int error = 0;
314 OpusDecoderPtr decoder{
315 opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
316 num_stereo_streams, mapping_table.data(), &error)};
317 if (error != OPUS_OK || decoder == nullptr) {
318 LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
319 IPC::ResponseBuilder rb{ctx, 2};
320 // TODO(ogniK): Use correct error code
321 rb.Push(ResultUnknown);
322 return;
323 }
324
325 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
326 rb.Push(ResultSuccess);
327 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
328 system, OpusDecoderState{std::move(decoder), sample_rate, channel_count});
329}
330
294HwOpus::HwOpus(Core::System& system_) : ServiceFramework{system_, "hwopus"} { 331HwOpus::HwOpus(Core::System& system_) : ServiceFramework{system_, "hwopus"} {
295 static const FunctionInfo functions[] = { 332 static const FunctionInfo functions[] = {
296 {0, &HwOpus::OpenOpusDecoder, "OpenOpusDecoder"}, 333 {0, &HwOpus::OpenHardwareOpusDecoder, "OpenHardwareOpusDecoder"},
297 {1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"}, 334 {1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"},
298 {2, nullptr, "OpenOpusDecoderForMultiStream"}, 335 {2, nullptr, "OpenOpusDecoderForMultiStream"},
299 {3, nullptr, "GetWorkBufferSizeForMultiStream"}, 336 {3, nullptr, "GetWorkBufferSizeForMultiStream"},
300 {4, nullptr, "OpenHardwareOpusDecoderEx"}, 337 {4, &HwOpus::OpenHardwareOpusDecoderEx, "OpenHardwareOpusDecoderEx"},
301 {5, nullptr, "GetWorkBufferSizeEx"}, 338 {5, &HwOpus::GetWorkBufferSizeEx, "GetWorkBufferSizeEx"},
302 {6, nullptr, "OpenHardwareOpusDecoderForMultiStreamEx"}, 339 {6, nullptr, "OpenHardwareOpusDecoderForMultiStreamEx"},
303 {7, nullptr, "GetWorkBufferSizeForMultiStreamEx"}, 340 {7, nullptr, "GetWorkBufferSizeForMultiStreamEx"},
304 }; 341 };
diff --git a/src/core/hle/service/audio/hwopus.h b/src/core/hle/service/audio/hwopus.h
index 4f921f18e..b74824ff3 100644
--- a/src/core/hle/service/audio/hwopus.h
+++ b/src/core/hle/service/audio/hwopus.h
@@ -18,8 +18,10 @@ public:
18 ~HwOpus() override; 18 ~HwOpus() override;
19 19
20private: 20private:
21 void OpenOpusDecoder(Kernel::HLERequestContext& ctx); 21 void OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx);
22 void OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx);
22 void GetWorkBufferSize(Kernel::HLERequestContext& ctx); 23 void GetWorkBufferSize(Kernel::HLERequestContext& ctx);
24 void GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx);
23}; 25};
24 26
25} // namespace Service::Audio 27} // namespace Service::Audio
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 3c16fe6c7..4a9b13e45 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -703,6 +703,16 @@ FileSys::VirtualDir FileSystemController::GetModificationLoadRoot(u64 title_id)
703 return bis_factory->GetModificationLoadRoot(title_id); 703 return bis_factory->GetModificationLoadRoot(title_id);
704} 704}
705 705
706FileSys::VirtualDir FileSystemController::GetSDMCModificationLoadRoot(u64 title_id) const {
707 LOG_TRACE(Service_FS, "Opening SDMC mod load root for tid={:016X}", title_id);
708
709 if (sdmc_factory == nullptr) {
710 return nullptr;
711 }
712
713 return sdmc_factory->GetSDMCModificationLoadRoot(title_id);
714}
715
706FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const { 716FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const {
707 LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id); 717 LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id);
708 718
@@ -733,20 +743,23 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
733 } 743 }
734 744
735 using YuzuPath = Common::FS::YuzuPath; 745 using YuzuPath = Common::FS::YuzuPath;
746 const auto sdmc_dir_path = Common::FS::GetYuzuPath(YuzuPath::SDMCDir);
747 const auto sdmc_load_dir_path = sdmc_dir_path / "atmosphere/contents";
736 const auto rw_mode = FileSys::Mode::ReadWrite; 748 const auto rw_mode = FileSys::Mode::ReadWrite;
737 749
738 auto nand_directory = 750 auto nand_directory =
739 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode); 751 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode);
740 auto sd_directory = 752 auto sd_directory = vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_dir_path), rw_mode);
741 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::SDMCDir), rw_mode);
742 auto load_directory = 753 auto load_directory =
743 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read); 754 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read);
755 auto sd_load_directory =
756 vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_load_dir_path), FileSys::Mode::Read);
744 auto dump_directory = 757 auto dump_directory =
745 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode); 758 vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode);
746 759
747 if (bis_factory == nullptr) { 760 if (bis_factory == nullptr) {
748 bis_factory = 761 bis_factory = std::make_unique<FileSys::BISFactory>(
749 std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory); 762 nand_directory, std::move(load_directory), std::move(dump_directory));
750 system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND, 763 system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND,
751 bis_factory->GetSystemNANDContents()); 764 bis_factory->GetSystemNANDContents());
752 system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND, 765 system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND,
@@ -759,7 +772,8 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
759 } 772 }
760 773
761 if (sdmc_factory == nullptr) { 774 if (sdmc_factory == nullptr) {
762 sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory)); 775 sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory),
776 std::move(sd_load_directory));
763 system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC, 777 system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
764 sdmc_factory->GetSDMCContents()); 778 sdmc_factory->GetSDMCContents());
765 } 779 }
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index b6b1b9220..d387af3cb 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -115,6 +115,7 @@ public:
115 FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const; 115 FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const;
116 FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const; 116 FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const;
117 117
118 FileSys::VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const;
118 FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const; 119 FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const;
119 FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const; 120 FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const;
120 121
diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp
index 114aff31c..869d2763f 100644
--- a/src/core/hle/service/mii/manager.cpp
+++ b/src/core/hle/service/mii/manager.cpp
@@ -20,6 +20,7 @@ namespace {
20 20
21constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4}; 21constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4};
22 22
23constexpr std::size_t BaseMiiCount{2};
23constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()}; 24constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()};
24 25
25constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'}; 26constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'};
@@ -415,7 +416,7 @@ u32 MiiManager::GetCount(SourceFlag source_flag) const {
415 count += 0; 416 count += 0;
416 } 417 }
417 if ((source_flag & SourceFlag::Default) != SourceFlag::None) { 418 if ((source_flag & SourceFlag::Default) != SourceFlag::None) {
418 count += DefaultMiiCount; 419 count += (DefaultMiiCount - BaseMiiCount);
419 } 420 }
420 return static_cast<u32>(count); 421 return static_cast<u32>(count);
421} 422}
@@ -445,7 +446,7 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_
445 return MakeResult(std::move(result)); 446 return MakeResult(std::move(result));
446 } 447 }
447 448
448 for (std::size_t index = 0; index < DefaultMiiCount; index++) { 449 for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) {
449 result.emplace_back(BuildDefault(index), Source::Default); 450 result.emplace_back(BuildDefault(index), Source::Default);
450 } 451 }
451 452
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt
index 7c5763f9c..c4283a952 100644
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -34,28 +34,17 @@ if (MSVC)
34 /W4 34 /W4
35 /WX 35 /WX
36 36
37 # 'expression' : signed/unsigned mismatch 37 /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
38 /we4018 38 /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
39 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) 39 /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch
40 /we4244 40 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
41 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch
42 /we4245
43 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
44 /we4254
45 # 'var' : conversion from 'size_t' to 'type', possible loss of data
46 /we4267
47 # 'context' : truncation from 'type1' to 'type2'
48 /we4305
49 ) 41 )
50else() 42else()
51 target_compile_options(input_common PRIVATE 43 target_compile_options(input_common PRIVATE
52 -Werror 44 -Werror
53 -Werror=conversion 45 -Werror=conversion
54 -Werror=ignored-qualifiers 46 -Werror=ignored-qualifiers
55 -Werror=implicit-fallthrough
56 -Werror=reorder
57 -Werror=shadow 47 -Werror=shadow
58 -Werror=sign-compare
59 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> 48 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
60 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> 49 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
61 -Werror=unused-variable 50 -Werror=unused-variable
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index 100138d11..2fafd077f 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -27,6 +27,7 @@ public:
27 down->SetCallback(callbacks); 27 down->SetCallback(callbacks);
28 left->SetCallback(callbacks); 28 left->SetCallback(callbacks);
29 right->SetCallback(callbacks); 29 right->SetCallback(callbacks);
30 modifier->SetCallback(callbacks);
30 } 31 }
31 32
32 bool IsAngleGreater(float old_angle, float new_angle) const { 33 bool IsAngleGreater(float old_angle, float new_angle) const {
diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp
index 320f51ee6..a2f1bb67c 100644
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -5,14 +5,7 @@
5#include <chrono> 5#include <chrono>
6#include <thread> 6#include <thread>
7 7
8#ifdef _MSC_VER
9#pragma warning(push)
10#pragma warning(disable : 4200) // nonstandard extension used : zero-sized array in struct/union
11#endif
12#include <libusb.h> 8#include <libusb.h>
13#ifdef _MSC_VER
14#pragma warning(pop)
15#endif
16 9
17#include "common/logging/log.h" 10#include "common/logging/log.h"
18#include "common/param_package.h" 11#include "common/param_package.h"
diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h
index a3d276697..1bdc9209e 100644
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -8,14 +8,7 @@
8#include <optional> 8#include <optional>
9#include <type_traits> 9#include <type_traits>
10 10
11#ifdef _MSC_VER
12#pragma warning(push)
13#pragma warning(disable : 4701)
14#endif
15#include <boost/crc.hpp> 11#include <boost/crc.hpp>
16#ifdef _MSC_VER
17#pragma warning(pop)
18#endif
19 12
20#include "common/bit_field.h" 13#include "common/bit_field.h"
21#include "common/swap.h" 14#include "common/swap.h"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f9454bbaa..e4de55f4d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -292,12 +292,12 @@ endif()
292 292
293if (MSVC) 293if (MSVC)
294 target_compile_options(video_core PRIVATE 294 target_compile_options(video_core PRIVATE
295 /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data 295 /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
296 /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data
296 /we4456 # Declaration of 'identifier' hides previous local declaration 297 /we4456 # Declaration of 'identifier' hides previous local declaration
297 /we4457 # Declaration of 'identifier' hides function parameter 298 /we4457 # Declaration of 'identifier' hides function parameter
298 /we4458 # Declaration of 'identifier' hides class member 299 /we4458 # Declaration of 'identifier' hides class member
299 /we4459 # Declaration of 'identifier' hides global declaration 300 /we4459 # Declaration of 'identifier' hides global declaration
300 /we4715 # 'function' : not all control paths return a value
301 ) 301 )
302else() 302else()
303 target_compile_options(video_core PRIVATE 303 target_compile_options(video_core PRIVATE
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9d726a6fb..cad7f902d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -99,7 +99,7 @@ class BufferCache {
99 }; 99 };
100 100
101public: 101public:
102 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB; 102 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
103 103
104 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 104 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
105 Tegra::Engines::Maxwell3D& maxwell3d_, 105 Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -109,8 +109,6 @@ public:
109 109
110 void TickFrame(); 110 void TickFrame();
111 111
112 void RunGarbageCollector();
113
114 void WriteMemory(VAddr cpu_addr, u64 size); 112 void WriteMemory(VAddr cpu_addr, u64 size);
115 113
116 void CachedWriteMemory(VAddr cpu_addr, u64 size); 114 void CachedWriteMemory(VAddr cpu_addr, u64 size);
@@ -197,6 +195,8 @@ private:
197 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); 195 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
198 } 196 }
199 197
198 void RunGarbageCollector();
199
200 void BindHostIndexBuffer(); 200 void BindHostIndexBuffer();
201 201
202 void BindHostVertexBuffers(); 202 void BindHostVertexBuffers();
@@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
416 416
417template <class P> 417template <class P>
418void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 418void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
419 ForEachBufferInRange(cpu_addr, size, 419 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
420 [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); 420 DownloadBufferMemory(buffer, cpu_addr, size);
421 });
421} 422}
422 423
423template <class P> 424template <class P>
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a3fda1094..8b86ad050 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
103 case ThiMethod::SetMethod1: 103 case ThiMethod::SetMethod1:
104 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", 104 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
105 static_cast<u32>(nvdec_thi_state.method_0)); 105 static_cast<u32>(nvdec_thi_state.method_0));
106 nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), 106 nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
107 data);
108 break; 107 break;
109 default: 108 default:
110 break; 109 break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index d02dc6260..1b4bbc8ac 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
23 av_free(ptr); 23 av_free(ptr);
24} 24}
25 25
26Codec::Codec(GPU& gpu_) 26Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
27 : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), 27 : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
28 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} 28 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
29 29
30Codec::~Codec() { 30Codec::~Codec() {
@@ -43,46 +43,48 @@ Codec::~Codec() {
43 avcodec_close(av_codec_ctx); 43 avcodec_close(av_codec_ctx);
44} 44}
45 45
46void Codec::Initialize() {
47 AVCodecID codec{AV_CODEC_ID_NONE};
48 switch (current_codec) {
49 case NvdecCommon::VideoCodec::H264:
50 codec = AV_CODEC_ID_H264;
51 break;
52 case NvdecCommon::VideoCodec::Vp9:
53 codec = AV_CODEC_ID_VP9;
54 break;
55 default:
56 return;
57 }
58 av_codec = avcodec_find_decoder(codec);
59 av_codec_ctx = avcodec_alloc_context3(av_codec);
60 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
61
62 // TODO(ameerj): libavcodec gpu hw acceleration
63
64 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
65 if (av_error < 0) {
66 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
67 avcodec_close(av_codec_ctx);
68 return;
69 }
70 initialized = true;
71 return;
72}
73
46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 74void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
47 if (current_codec != codec) { 75 if (current_codec != codec) {
48 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
49 current_codec = codec; 76 current_codec = codec;
77 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
50 } 78 }
51} 79}
52 80
53void Codec::StateWrite(u32 offset, u64 arguments) {
54 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
55 std::memcpy(state_offset, &arguments, sizeof(u64));
56}
57
58void Codec::Decode() { 81void Codec::Decode() {
59 bool is_first_frame = false; 82 const bool is_first_frame = !initialized;
60 if (!initialized) { 83 if (!initialized) {
61 if (current_codec == NvdecCommon::VideoCodec::H264) { 84 Initialize();
62 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
63 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
64 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
65 } else {
66 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
67 return;
68 }
69
70 av_codec_ctx = avcodec_alloc_context3(av_codec);
71 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
72
73 // TODO(ameerj): libavcodec gpu hw acceleration
74
75 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
76 if (av_error < 0) {
77 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
78 avcodec_close(av_codec_ctx);
79 return;
80 }
81 initialized = true;
82 is_first_frame = true;
83 } 85 }
84 bool vp9_hidden_frame = false;
85 86
87 bool vp9_hidden_frame = false;
86 AVPacket packet{}; 88 AVPacket packet{};
87 av_init_packet(&packet); 89 av_init_packet(&packet);
88 std::vector<u8> frame_data; 90 std::vector<u8> frame_data;
@@ -95,7 +97,7 @@ void Codec::Decode() {
95 } 97 }
96 98
97 packet.data = frame_data.data(); 99 packet.data = frame_data.data();
98 packet.size = static_cast<int>(frame_data.size()); 100 packet.size = static_cast<s32>(frame_data.size());
99 101
100 avcodec_send_packet(av_codec_ctx, &packet); 102 avcodec_send_packet(av_codec_ctx, &packet);
101 103
@@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
127 return current_codec; 129 return current_codec;
128} 130}
129 131
132std::string_view Codec::GetCurrentCodecName() const {
133 switch (current_codec) {
134 case NvdecCommon::VideoCodec::None:
135 return "None";
136 case NvdecCommon::VideoCodec::H264:
137 return "H264";
138 case NvdecCommon::VideoCodec::Vp8:
139 return "VP8";
140 case NvdecCommon::VideoCodec::H265:
141 return "H265";
142 case NvdecCommon::VideoCodec::Vp9:
143 return "VP9";
144 default:
145 return "Unknown";
146 }
147};
148
130} // namespace Tegra 149} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 8a2a6c360..96c823c76 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -34,15 +34,15 @@ class VP9;
34 34
35class Codec { 35class Codec {
36public: 36public:
37 explicit Codec(GPU& gpu); 37 explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
38 ~Codec(); 38 ~Codec();
39 39
40 /// Initialize the codec, returning success or failure
41 void Initialize();
42
40 /// Sets NVDEC video stream codec 43 /// Sets NVDEC video stream codec
41 void SetTargetCodec(NvdecCommon::VideoCodec codec); 44 void SetTargetCodec(NvdecCommon::VideoCodec codec);
42 45
43 /// Populate NvdecRegisters state with argument value at the provided offset
44 void StateWrite(u32 offset, u64 arguments);
45
46 /// Call decoders to construct headers, decode AVFrame with ffmpeg 46 /// Call decoders to construct headers, decode AVFrame with ffmpeg
47 void Decode(); 47 void Decode();
48 48
@@ -51,6 +51,8 @@ public:
51 51
52 /// Returns the value of current_codec 52 /// Returns the value of current_codec
53 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; 53 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
54 /// Return name of the current codec
55 [[nodiscard]] std::string_view GetCurrentCodecName() const;
54 56
55private: 57private:
56 bool initialized{}; 58 bool initialized{};
@@ -60,10 +62,10 @@ private:
60 AVCodecContext* av_codec_ctx{nullptr}; 62 AVCodecContext* av_codec_ctx{nullptr};
61 63
62 GPU& gpu; 64 GPU& gpu;
65 const NvdecCommon::NvdecRegisters& state;
63 std::unique_ptr<Decoder::H264> h264_decoder; 66 std::unique_ptr<Decoder::H264> h264_decoder;
64 std::unique_ptr<Decoder::VP9> vp9_decoder; 67 std::unique_ptr<Decoder::VP9> vp9_decoder;
65 68
66 NvdecCommon::NvdecRegisters state{};
67 std::queue<AVFramePtr> av_frames{}; 69 std::queue<AVFramePtr> av_frames{};
68}; 70};
69 71
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index fea6aed98..5fb6d45ee 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -45,134 +45,129 @@ H264::~H264() = default;
45 45
46const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, 46const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
47 bool is_first_frame) { 47 bool is_first_frame) {
48 H264DecoderContext context{}; 48 H264DecoderContext context;
49 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); 49 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
50 50
51 const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); 51 const s64 frame_number = context.h264_parameter_set.frame_number.Value();
52 if (!is_first_frame && frame_number != 0) { 52 if (!is_first_frame && frame_number != 0) {
53 frame.resize(context.frame_data_size); 53 frame.resize(context.stream_len);
54
55 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); 54 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
56 } else { 55 return frame;
57 /// Encode header 56 }
58 H264BitWriter writer{};
59 writer.WriteU(1, 24);
60 writer.WriteU(0, 1);
61 writer.WriteU(3, 2);
62 writer.WriteU(7, 5);
63 writer.WriteU(100, 8);
64 writer.WriteU(0, 8);
65 writer.WriteU(31, 8);
66 writer.WriteUe(0);
67 const auto chroma_format_idc =
68 static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
69 writer.WriteUe(chroma_format_idc);
70 if (chroma_format_idc == 3) {
71 writer.WriteBit(false);
72 }
73
74 writer.WriteUe(0);
75 writer.WriteUe(0);
76 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
77 writer.WriteBit(false); // Scaling matrix present flag
78
79 const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
80 writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
81 writer.WriteUe(order_cnt_type);
82 if (order_cnt_type == 0) {
83 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
84 } else if (order_cnt_type == 1) {
85 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
86
87 writer.WriteSe(0);
88 writer.WriteSe(0);
89 writer.WriteUe(0);
90 }
91
92 const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
93 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
94 57
95 writer.WriteUe(16); 58 // Encode header
59 H264BitWriter writer{};
60 writer.WriteU(1, 24);
61 writer.WriteU(0, 1);
62 writer.WriteU(3, 2);
63 writer.WriteU(7, 5);
64 writer.WriteU(100, 8);
65 writer.WriteU(0, 8);
66 writer.WriteU(31, 8);
67 writer.WriteUe(0);
68 const u32 chroma_format_idc =
69 static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
70 writer.WriteUe(chroma_format_idc);
71 if (chroma_format_idc == 3) {
96 writer.WriteBit(false); 72 writer.WriteBit(false);
97 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); 73 }
98 writer.WriteUe(pic_height - 1);
99 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
100
101 if (!context.h264_parameter_set.frame_mbs_only_flag) {
102 writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
103 }
104 74
105 writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); 75 writer.WriteUe(0);
106 writer.WriteBit(false); // Frame cropping flag 76 writer.WriteUe(0);
107 writer.WriteBit(false); // VUI parameter present flag 77 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
78 writer.WriteBit(false); // Scaling matrix present flag
108 79
109 writer.End(); 80 writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
110 81
111 // H264 PPS 82 const auto order_cnt_type =
112 writer.WriteU(1, 24); 83 static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
113 writer.WriteU(0, 1); 84 writer.WriteUe(order_cnt_type);
114 writer.WriteU(3, 2); 85 if (order_cnt_type == 0) {
115 writer.WriteU(8, 5); 86 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
87 } else if (order_cnt_type == 1) {
88 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
116 89
90 writer.WriteSe(0);
91 writer.WriteSe(0);
117 writer.WriteUe(0); 92 writer.WriteUe(0);
118 writer.WriteUe(0); 93 }
119 94
120 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); 95 const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
121 writer.WriteBit(false); 96 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
122 writer.WriteUe(0); 97
123 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); 98 writer.WriteUe(16);
124 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); 99 writer.WriteBit(false);
125 writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); 100 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
126 writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); 101 writer.WriteUe(pic_height - 1);
127 s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); 102 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
128 pic_init_qp = (pic_init_qp << 26) >> 26;
129 writer.WriteSe(pic_init_qp);
130 writer.WriteSe(0);
131 s32 chroma_qp_index_offset =
132 static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
133 chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
134 103
135 writer.WriteSe(chroma_qp_index_offset); 104 if (!context.h264_parameter_set.frame_mbs_only_flag) {
136 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); 105 writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
137 writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); 106 }
138 writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
139 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
140 107
108 writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
109 writer.WriteBit(false); // Frame cropping flag
110 writer.WriteBit(false); // VUI parameter present flag
111
112 writer.End();
113
114 // H264 PPS
115 writer.WriteU(1, 24);
116 writer.WriteU(0, 1);
117 writer.WriteU(3, 2);
118 writer.WriteU(8, 5);
119
120 writer.WriteUe(0);
121 writer.WriteUe(0);
122
123 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
124 writer.WriteBit(false);
125 writer.WriteUe(0);
126 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
127 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
128 writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
129 writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
130 s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
131 writer.WriteSe(pic_init_qp);
132 writer.WriteSe(0);
133 s32 chroma_qp_index_offset =
134 static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
135
136 writer.WriteSe(chroma_qp_index_offset);
137 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
138 writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
139 writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
140 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
141
142 writer.WriteBit(true);
143
144 for (s32 index = 0; index < 6; index++) {
141 writer.WriteBit(true); 145 writer.WriteBit(true);
146 std::span<const u8> matrix{context.weight_scale};
147 writer.WriteScalingList(matrix, index * 16, 16);
148 }
142 149
143 for (s32 index = 0; index < 6; index++) { 150 if (context.h264_parameter_set.transform_8x8_mode_flag) {
151 for (s32 index = 0; index < 2; index++) {
144 writer.WriteBit(true); 152 writer.WriteBit(true);
145 const auto matrix_x4 = 153 std::span<const u8> matrix{context.weight_scale_8x8};
146 std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); 154 writer.WriteScalingList(matrix, index * 64, 64);
147 writer.WriteScalingList(matrix_x4, index * 16, 16);
148 }
149
150 if (context.h264_parameter_set.transform_8x8_mode_flag) {
151 for (s32 index = 0; index < 2; index++) {
152 writer.WriteBit(true);
153 const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
154 context.scaling_matrix_8.end());
155
156 writer.WriteScalingList(matrix_x8, index * 64, 64);
157 }
158 } 155 }
156 }
159 157
160 s32 chroma_qp_index_offset2 = 158 s32 chroma_qp_index_offset2 =
161 static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); 159 static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
162 chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
163 160
164 writer.WriteSe(chroma_qp_index_offset2); 161 writer.WriteSe(chroma_qp_index_offset2);
165 162
166 writer.End(); 163 writer.End();
167 164
168 const auto& encoded_header = writer.GetByteArray(); 165 const auto& encoded_header = writer.GetByteArray();
169 frame.resize(encoded_header.size() + context.frame_data_size); 166 frame.resize(encoded_header.size() + context.stream_len);
170 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); 167 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
171 168
172 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, 169 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
173 frame.data() + encoded_header.size(), 170 frame.data() + encoded_header.size(), context.stream_len);
174 context.frame_data_size);
175 }
176 171
177 return frame; 172 return frame;
178} 173}
@@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
202 WriteBits(state ? 1 : 0, 1); 197 WriteBits(state ? 1 : 0, 1);
203} 198}
204 199
205void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { 200void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
206 std::vector<u8> scan(count); 201 std::vector<u8> scan(count);
207 if (count == 16) { 202 if (count == 16) {
208 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); 203 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 0f3a1d9f3..bfe84a472 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -20,7 +20,9 @@
20 20
21#pragma once 21#pragma once
22 22
23#include <span>
23#include <vector> 24#include <vector>
25#include "common/bit_field.h"
24#include "common/common_funcs.h" 26#include "common/common_funcs.h"
25#include "common/common_types.h" 27#include "common/common_types.h"
26#include "video_core/command_classes/nvdec_common.h" 28#include "video_core/command_classes/nvdec_common.h"
@@ -48,7 +50,7 @@ public:
48 50
49 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification 51 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
50 /// Writes the scaling matrices of the sream 52 /// Writes the scaling matrices of the sream
51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); 53 void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
52 54
53 /// Return the bitstream as a vector. 55 /// Return the bitstream as a vector.
54 [[nodiscard]] std::vector<u8>& GetByteArray(); 56 [[nodiscard]] std::vector<u8>& GetByteArray();
@@ -78,40 +80,110 @@ public:
78 const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); 80 const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
79 81
80private: 82private:
83 std::vector<u8> frame;
84 GPU& gpu;
85
81 struct H264ParameterSet { 86 struct H264ParameterSet {
82 u32 log2_max_pic_order_cnt{}; 87 s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
83 u32 delta_pic_order_always_zero_flag{}; 88 s32 delta_pic_order_always_zero_flag; ///< 0x04
84 u32 frame_mbs_only_flag{}; 89 s32 frame_mbs_only_flag; ///< 0x08
85 u32 pic_width_in_mbs{}; 90 u32 pic_width_in_mbs; ///< 0x0C
86 u32 pic_height_in_map_units{}; 91 u32 frame_height_in_map_units; ///< 0x10
87 INSERT_PADDING_WORDS(1); 92 union { ///< 0x14
88 u32 entropy_coding_mode_flag{}; 93 BitField<0, 2, u32> tile_format;
89 u32 bottom_field_pic_order_flag{}; 94 BitField<2, 3, u32> gob_height;
90 u32 num_refidx_l0_default_active{}; 95 };
91 u32 num_refidx_l1_default_active{}; 96 u32 entropy_coding_mode_flag; ///< 0x18
92 u32 deblocking_filter_control_flag{}; 97 s32 pic_order_present_flag; ///< 0x1C
93 u32 redundant_pic_count_flag{}; 98 s32 num_refidx_l0_default_active; ///< 0x20
94 u32 transform_8x8_mode_flag{}; 99 s32 num_refidx_l1_default_active; ///< 0x24
95 INSERT_PADDING_WORDS(9); 100 s32 deblocking_filter_control_present_flag; ///< 0x28
96 u64 flags{}; 101 s32 redundant_pic_cnt_present_flag; ///< 0x2C
97 u32 frame_number{}; 102 u32 transform_8x8_mode_flag; ///< 0x30
98 u32 frame_number2{}; 103 u32 pitch_luma; ///< 0x34
104 u32 pitch_chroma; ///< 0x38
105 u32 luma_top_offset; ///< 0x3C
106 u32 luma_bot_offset; ///< 0x40
107 u32 luma_frame_offset; ///< 0x44
108 u32 chroma_top_offset; ///< 0x48
109 u32 chroma_bot_offset; ///< 0x4C
110 u32 chroma_frame_offset; ///< 0x50
111 u32 hist_buffer_size; ///< 0x54
112 union { ///< 0x58
113 union {
114 BitField<0, 1, u64> mbaff_frame;
115 BitField<1, 1, u64> direct_8x8_inference;
116 BitField<2, 1, u64> weighted_pred;
117 BitField<3, 1, u64> constrained_intra_pred;
118 BitField<4, 1, u64> ref_pic;
119 BitField<5, 1, u64> field_pic;
120 BitField<6, 1, u64> bottom_field;
121 BitField<7, 1, u64> second_field;
122 } flags;
123 BitField<8, 4, u64> log2_max_frame_num_minus4;
124 BitField<12, 2, u64> chroma_format_idc;
125 BitField<14, 2, u64> pic_order_cnt_type;
126 BitField<16, 6, s64> pic_init_qp_minus26;
127 BitField<22, 5, s64> chroma_qp_index_offset;
128 BitField<27, 5, s64> second_chroma_qp_index_offset;
129 BitField<32, 2, u64> weighted_bipred_idc;
130 BitField<34, 7, u64> curr_pic_idx;
131 BitField<41, 5, u64> curr_col_idx;
132 BitField<46, 16, u64> frame_number;
133 BitField<62, 1, u64> frame_surfaces;
134 BitField<63, 1, u64> output_memory_layout;
135 };
99 }; 136 };
100 static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); 137 static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
101 138
102 struct H264DecoderContext { 139 struct H264DecoderContext {
103 INSERT_PADDING_BYTES(0x48); 140 INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
104 u32 frame_data_size{}; 141 u32 stream_len; ///< 0x0048
105 INSERT_PADDING_BYTES(0xc); 142 INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
106 H264ParameterSet h264_parameter_set{}; 143 H264ParameterSet h264_parameter_set; ///< 0x0058
107 INSERT_PADDING_BYTES(0x100); 144 INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
108 std::array<u8, 0x60> scaling_matrix_4; 145 std::array<u8, 0x60> weight_scale; ///< 0x01C0
109 std::array<u8, 0x80> scaling_matrix_8; 146 std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
110 }; 147 };
111 static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); 148 static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
112 149
113 std::vector<u8> frame; 150#define ASSERT_POSITION(field_name, position) \
114 GPU& gpu; 151 static_assert(offsetof(H264ParameterSet, field_name) == position, \
152 "Field " #field_name " has invalid position")
153
154 ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
155 ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
156 ASSERT_POSITION(frame_mbs_only_flag, 0x08);
157 ASSERT_POSITION(pic_width_in_mbs, 0x0C);
158 ASSERT_POSITION(frame_height_in_map_units, 0x10);
159 ASSERT_POSITION(tile_format, 0x14);
160 ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
161 ASSERT_POSITION(pic_order_present_flag, 0x1C);
162 ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
163 ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
164 ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
165 ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
166 ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
167 ASSERT_POSITION(pitch_luma, 0x34);
168 ASSERT_POSITION(pitch_chroma, 0x38);
169 ASSERT_POSITION(luma_top_offset, 0x3C);
170 ASSERT_POSITION(luma_bot_offset, 0x40);
171 ASSERT_POSITION(luma_frame_offset, 0x44);
172 ASSERT_POSITION(chroma_top_offset, 0x48);
173 ASSERT_POSITION(chroma_bot_offset, 0x4C);
174 ASSERT_POSITION(chroma_frame_offset, 0x50);
175 ASSERT_POSITION(hist_buffer_size, 0x54);
176 ASSERT_POSITION(flags, 0x58);
177#undef ASSERT_POSITION
178
179#define ASSERT_POSITION(field_name, position) \
180 static_assert(offsetof(H264DecoderContext, field_name) == position, \
181 "Field " #field_name " has invalid position")
182
183 ASSERT_POSITION(stream_len, 0x48);
184 ASSERT_POSITION(h264_parameter_set, 0x58);
185 ASSERT_POSITION(weight_scale, 0x1C0);
186#undef ASSERT_POSITION
115}; 187};
116 188
117} // namespace Decoder 189} // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 29bb31418..902bc2a98 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
354} 354}
355 355
356Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { 356Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
357 PictureInfo picture_info{}; 357 PictureInfo picture_info;
358 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); 358 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
359 Vp9PictureInfo vp9_info = picture_info.Convert(); 359 Vp9PictureInfo vp9_info = picture_info.Convert();
360 360
@@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
370} 370}
371 371
372void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { 372void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
373 EntropyProbs entropy{}; 373 EntropyProbs entropy;
374 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); 374 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
375 entropy.Convert(dst); 375 entropy.Convert(dst);
376} 376}
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 139501a1c..2da14f3ca 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -15,10 +15,10 @@ class GPU;
15 15
16namespace Decoder { 16namespace Decoder {
17struct Vp9FrameDimensions { 17struct Vp9FrameDimensions {
18 s16 width{}; 18 s16 width;
19 s16 height{}; 19 s16 height;
20 s16 luma_pitch{}; 20 s16 luma_pitch;
21 s16 chroma_pitch{}; 21 s16 chroma_pitch;
22}; 22};
23static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); 23static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
24 24
@@ -49,87 +49,87 @@ enum class TxMode {
49}; 49};
50 50
51struct Segmentation { 51struct Segmentation {
52 u8 enabled{}; 52 u8 enabled;
53 u8 update_map{}; 53 u8 update_map;
54 u8 temporal_update{}; 54 u8 temporal_update;
55 u8 abs_delta{}; 55 u8 abs_delta;
56 std::array<u32, 8> feature_mask{}; 56 std::array<u32, 8> feature_mask;
57 std::array<std::array<s16, 4>, 8> feature_data{}; 57 std::array<std::array<s16, 4>, 8> feature_data;
58}; 58};
59static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); 59static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
60 60
61struct LoopFilter { 61struct LoopFilter {
62 u8 mode_ref_delta_enabled{}; 62 u8 mode_ref_delta_enabled;
63 std::array<s8, 4> ref_deltas{}; 63 std::array<s8, 4> ref_deltas;
64 std::array<s8, 2> mode_deltas{}; 64 std::array<s8, 2> mode_deltas;
65}; 65};
66static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); 66static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
67 67
68struct Vp9EntropyProbs { 68struct Vp9EntropyProbs {
69 std::array<u8, 36> y_mode_prob{}; 69 std::array<u8, 36> y_mode_prob; ///< 0x0000
70 std::array<u8, 64> partition_prob{}; 70 std::array<u8, 64> partition_prob; ///< 0x0024
71 std::array<u8, 1728> coef_probs{}; 71 std::array<u8, 1728> coef_probs; ///< 0x0064
72 std::array<u8, 8> switchable_interp_prob{}; 72 std::array<u8, 8> switchable_interp_prob; ///< 0x0724
73 std::array<u8, 28> inter_mode_prob{}; 73 std::array<u8, 28> inter_mode_prob; ///< 0x072C
74 std::array<u8, 4> intra_inter_prob{}; 74 std::array<u8, 4> intra_inter_prob; ///< 0x0748
75 std::array<u8, 5> comp_inter_prob{}; 75 std::array<u8, 5> comp_inter_prob; ///< 0x074C
76 std::array<u8, 10> single_ref_prob{}; 76 std::array<u8, 10> single_ref_prob; ///< 0x0751
77 std::array<u8, 5> comp_ref_prob{}; 77 std::array<u8, 5> comp_ref_prob; ///< 0x075B
78 std::array<u8, 6> tx_32x32_prob{}; 78 std::array<u8, 6> tx_32x32_prob; ///< 0x0760
79 std::array<u8, 4> tx_16x16_prob{}; 79 std::array<u8, 4> tx_16x16_prob; ///< 0x0766
80 std::array<u8, 2> tx_8x8_prob{}; 80 std::array<u8, 2> tx_8x8_prob; ///< 0x076A
81 std::array<u8, 3> skip_probs{}; 81 std::array<u8, 3> skip_probs; ///< 0x076C
82 std::array<u8, 3> joints{}; 82 std::array<u8, 3> joints; ///< 0x076F
83 std::array<u8, 2> sign{}; 83 std::array<u8, 2> sign; ///< 0x0772
84 std::array<u8, 20> classes{}; 84 std::array<u8, 20> classes; ///< 0x0774
85 std::array<u8, 2> class_0{}; 85 std::array<u8, 2> class_0; ///< 0x0788
86 std::array<u8, 20> prob_bits{}; 86 std::array<u8, 20> prob_bits; ///< 0x078A
87 std::array<u8, 12> class_0_fr{}; 87 std::array<u8, 12> class_0_fr; ///< 0x079E
88 std::array<u8, 6> fr{}; 88 std::array<u8, 6> fr; ///< 0x07AA
89 std::array<u8, 2> class_0_hp{}; 89 std::array<u8, 2> class_0_hp; ///< 0x07B0
90 std::array<u8, 2> high_precision{}; 90 std::array<u8, 2> high_precision; ///< 0x07B2
91}; 91};
92static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); 92static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
93 93
94struct Vp9PictureInfo { 94struct Vp9PictureInfo {
95 bool is_key_frame{}; 95 bool is_key_frame;
96 bool intra_only{}; 96 bool intra_only;
97 bool last_frame_was_key{}; 97 bool last_frame_was_key;
98 bool frame_size_changed{}; 98 bool frame_size_changed;
99 bool error_resilient_mode{}; 99 bool error_resilient_mode;
100 bool last_frame_shown{}; 100 bool last_frame_shown;
101 bool show_frame{}; 101 bool show_frame;
102 std::array<s8, 4> ref_frame_sign_bias{}; 102 std::array<s8, 4> ref_frame_sign_bias;
103 s32 base_q_index{}; 103 s32 base_q_index;
104 s32 y_dc_delta_q{}; 104 s32 y_dc_delta_q;
105 s32 uv_dc_delta_q{}; 105 s32 uv_dc_delta_q;
106 s32 uv_ac_delta_q{}; 106 s32 uv_ac_delta_q;
107 bool lossless{}; 107 bool lossless;
108 s32 transform_mode{}; 108 s32 transform_mode;
109 bool allow_high_precision_mv{}; 109 bool allow_high_precision_mv;
110 s32 interp_filter{}; 110 s32 interp_filter;
111 s32 reference_mode{}; 111 s32 reference_mode;
112 s8 comp_fixed_ref{}; 112 s8 comp_fixed_ref;
113 std::array<s8, 2> comp_var_ref{}; 113 std::array<s8, 2> comp_var_ref;
114 s32 log2_tile_cols{}; 114 s32 log2_tile_cols;
115 s32 log2_tile_rows{}; 115 s32 log2_tile_rows;
116 bool segment_enabled{}; 116 bool segment_enabled;
117 bool segment_map_update{}; 117 bool segment_map_update;
118 bool segment_map_temporal_update{}; 118 bool segment_map_temporal_update;
119 s32 segment_abs_delta{}; 119 s32 segment_abs_delta;
120 std::array<u32, 8> segment_feature_enable{}; 120 std::array<u32, 8> segment_feature_enable;
121 std::array<std::array<s16, 4>, 8> segment_feature_data{}; 121 std::array<std::array<s16, 4>, 8> segment_feature_data;
122 bool mode_ref_delta_enabled{}; 122 bool mode_ref_delta_enabled;
123 bool use_prev_in_find_mv_refs{}; 123 bool use_prev_in_find_mv_refs;
124 std::array<s8, 4> ref_deltas{}; 124 std::array<s8, 4> ref_deltas;
125 std::array<s8, 2> mode_deltas{}; 125 std::array<s8, 2> mode_deltas;
126 Vp9EntropyProbs entropy{}; 126 Vp9EntropyProbs entropy;
127 Vp9FrameDimensions frame_size{}; 127 Vp9FrameDimensions frame_size;
128 u8 first_level{}; 128 u8 first_level;
129 u8 sharpness_level{}; 129 u8 sharpness_level;
130 u32 bitstream_size{}; 130 u32 bitstream_size;
131 std::array<u64, 4> frame_offsets{}; 131 std::array<u64, 4> frame_offsets;
132 std::array<bool, 4> refresh_frame{}; 132 std::array<bool, 4> refresh_frame;
133}; 133};
134 134
135struct Vp9FrameContainer { 135struct Vp9FrameContainer {
@@ -138,35 +138,35 @@ struct Vp9FrameContainer {
138}; 138};
139 139
140struct PictureInfo { 140struct PictureInfo {
141 INSERT_PADDING_WORDS(12); 141 INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
142 u32 bitstream_size{}; 142 u32 bitstream_size; ///< 0x30
143 INSERT_PADDING_WORDS(5); 143 INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
144 Vp9FrameDimensions last_frame_size{}; 144 Vp9FrameDimensions last_frame_size; ///< 0x48
145 Vp9FrameDimensions golden_frame_size{}; 145 Vp9FrameDimensions golden_frame_size; ///< 0x50
146 Vp9FrameDimensions alt_frame_size{}; 146 Vp9FrameDimensions alt_frame_size; ///< 0x58
147 Vp9FrameDimensions current_frame_size{}; 147 Vp9FrameDimensions current_frame_size; ///< 0x60
148 u32 vp9_flags{}; 148 u32 vp9_flags; ///< 0x68
149 std::array<s8, 4> ref_frame_sign_bias{}; 149 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
150 u8 first_level{}; 150 u8 first_level; ///< 0x70
151 u8 sharpness_level{}; 151 u8 sharpness_level; ///< 0x71
152 u8 base_q_index{}; 152 u8 base_q_index; ///< 0x72
153 u8 y_dc_delta_q{}; 153 u8 y_dc_delta_q; ///< 0x73
154 u8 uv_ac_delta_q{}; 154 u8 uv_ac_delta_q; ///< 0x74
155 u8 uv_dc_delta_q{}; 155 u8 uv_dc_delta_q; ///< 0x75
156 u8 lossless{}; 156 u8 lossless; ///< 0x76
157 u8 tx_mode{}; 157 u8 tx_mode; ///< 0x77
158 u8 allow_high_precision_mv{}; 158 u8 allow_high_precision_mv; ///< 0x78
159 u8 interp_filter{}; 159 u8 interp_filter; ///< 0x79
160 u8 reference_mode{}; 160 u8 reference_mode; ///< 0x7A
161 s8 comp_fixed_ref{}; 161 s8 comp_fixed_ref; ///< 0x7B
162 std::array<s8, 2> comp_var_ref{}; 162 std::array<s8, 2> comp_var_ref; ///< 0x7C
163 u8 log2_tile_cols{}; 163 u8 log2_tile_cols; ///< 0x7E
164 u8 log2_tile_rows{}; 164 u8 log2_tile_rows; ///< 0x7F
165 Segmentation segmentation{}; 165 Segmentation segmentation; ///< 0x80
166 LoopFilter loop_filter{}; 166 LoopFilter loop_filter; ///< 0xE4
167 INSERT_PADDING_BYTES(5); 167 INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
168 u32 surface_params{}; 168 u32 surface_params; ///< 0xF0
169 INSERT_PADDING_WORDS(3); 169 INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
170 170
171 [[nodiscard]] Vp9PictureInfo Convert() const { 171 [[nodiscard]] Vp9PictureInfo Convert() const {
172 return { 172 return {
@@ -176,6 +176,7 @@ struct PictureInfo {
176 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, 176 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
177 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, 177 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
178 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, 178 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
179 .show_frame = false,
179 .ref_frame_sign_bias = ref_frame_sign_bias, 180 .ref_frame_sign_bias = ref_frame_sign_bias,
180 .base_q_index = base_q_index, 181 .base_q_index = base_q_index,
181 .y_dc_delta_q = y_dc_delta_q, 182 .y_dc_delta_q = y_dc_delta_q,
@@ -204,45 +205,48 @@ struct PictureInfo {
204 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), 205 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
205 .ref_deltas = loop_filter.ref_deltas, 206 .ref_deltas = loop_filter.ref_deltas,
206 .mode_deltas = loop_filter.mode_deltas, 207 .mode_deltas = loop_filter.mode_deltas,
208 .entropy{},
207 .frame_size = current_frame_size, 209 .frame_size = current_frame_size,
208 .first_level = first_level, 210 .first_level = first_level,
209 .sharpness_level = sharpness_level, 211 .sharpness_level = sharpness_level,
210 .bitstream_size = bitstream_size, 212 .bitstream_size = bitstream_size,
213 .frame_offsets{},
214 .refresh_frame{},
211 }; 215 };
212 } 216 }
213}; 217};
214static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); 218static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
215 219
216struct EntropyProbs { 220struct EntropyProbs {
217 INSERT_PADDING_BYTES(1024); 221 INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
218 std::array<u8, 28> inter_mode_prob{}; 222 std::array<u8, 28> inter_mode_prob; ///< 0x0400
219 std::array<u8, 4> intra_inter_prob{}; 223 std::array<u8, 4> intra_inter_prob; ///< 0x041C
220 INSERT_PADDING_BYTES(80); 224 INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
221 std::array<u8, 2> tx_8x8_prob{}; 225 std::array<u8, 2> tx_8x8_prob; ///< 0x0470
222 std::array<u8, 4> tx_16x16_prob{}; 226 std::array<u8, 4> tx_16x16_prob; ///< 0x0472
223 std::array<u8, 6> tx_32x32_prob{}; 227 std::array<u8, 6> tx_32x32_prob; ///< 0x0476
224 std::array<u8, 4> y_mode_prob_e8{}; 228 std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
225 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; 229 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
226 INSERT_PADDING_BYTES(64); 230 INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
227 std::array<u8, 64> partition_prob{}; 231 std::array<u8, 64> partition_prob; ///< 0x04E0
228 INSERT_PADDING_BYTES(10); 232 INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
229 std::array<u8, 8> switchable_interp_prob{}; 233 std::array<u8, 8> switchable_interp_prob; ///< 0x052A
230 std::array<u8, 5> comp_inter_prob{}; 234 std::array<u8, 5> comp_inter_prob; ///< 0x0532
231 std::array<u8, 3> skip_probs{}; 235 std::array<u8, 3> skip_probs; ///< 0x0537
232 INSERT_PADDING_BYTES(1); 236 INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
233 std::array<u8, 3> joints{}; 237 std::array<u8, 3> joints; ///< 0x053B
234 std::array<u8, 2> sign{}; 238 std::array<u8, 2> sign; ///< 0x053E
235 std::array<u8, 2> class_0{}; 239 std::array<u8, 2> class_0; ///< 0x0540
236 std::array<u8, 6> fr{}; 240 std::array<u8, 6> fr; ///< 0x0542
237 std::array<u8, 2> class_0_hp{}; 241 std::array<u8, 2> class_0_hp; ///< 0x0548
238 std::array<u8, 2> high_precision{}; 242 std::array<u8, 2> high_precision; ///< 0x054A
239 std::array<u8, 20> classes{}; 243 std::array<u8, 20> classes; ///< 0x054C
240 std::array<u8, 12> class_0_fr{}; 244 std::array<u8, 12> class_0_fr; ///< 0x0560
241 std::array<u8, 20> pred_bits{}; 245 std::array<u8, 20> pred_bits; ///< 0x056C
242 std::array<u8, 10> single_ref_prob{}; 246 std::array<u8, 10> single_ref_prob; ///< 0x0580
243 std::array<u8, 5> comp_ref_prob{}; 247 std::array<u8, 5> comp_ref_prob; ///< 0x058A
244 INSERT_PADDING_BYTES(17); 248 INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
245 std::array<u8, 2304> coef_probs{}; 249 std::array<u8, 2304> coef_probs; ///< 0x05A0
246 250
247 void Convert(Vp9EntropyProbs& fc) { 251 void Convert(Vp9EntropyProbs& fc) {
248 fc.inter_mode_prob = inter_mode_prob; 252 fc.inter_mode_prob = inter_mode_prob;
@@ -293,10 +297,45 @@ struct RefPoolElement {
293}; 297};
294 298
295struct FrameContexts { 299struct FrameContexts {
296 s64 from{}; 300 s64 from;
297 bool adapted{}; 301 bool adapted;
298 Vp9EntropyProbs probs{}; 302 Vp9EntropyProbs probs;
299}; 303};
300 304
305#define ASSERT_POSITION(field_name, position) \
306 static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
307 "Field " #field_name " has invalid position")
308
309ASSERT_POSITION(partition_prob, 0x0024);
310ASSERT_POSITION(switchable_interp_prob, 0x0724);
311ASSERT_POSITION(sign, 0x0772);
312ASSERT_POSITION(class_0_fr, 0x079E);
313ASSERT_POSITION(high_precision, 0x07B2);
314#undef ASSERT_POSITION
315
316#define ASSERT_POSITION(field_name, position) \
317 static_assert(offsetof(PictureInfo, field_name) == position, \
318 "Field " #field_name " has invalid position")
319
320ASSERT_POSITION(bitstream_size, 0x30);
321ASSERT_POSITION(last_frame_size, 0x48);
322ASSERT_POSITION(first_level, 0x70);
323ASSERT_POSITION(segmentation, 0x80);
324ASSERT_POSITION(loop_filter, 0xE4);
325ASSERT_POSITION(surface_params, 0xF0);
326#undef ASSERT_POSITION
327
328#define ASSERT_POSITION(field_name, position) \
329 static_assert(offsetof(EntropyProbs, field_name) == position, \
330 "Field " #field_name " has invalid position")
331
332ASSERT_POSITION(inter_mode_prob, 0x400);
333ASSERT_POSITION(tx_8x8_prob, 0x470);
334ASSERT_POSITION(partition_prob, 0x4E0);
335ASSERT_POSITION(class_0, 0x540);
336ASSERT_POSITION(class_0_fr, 0x560);
337ASSERT_POSITION(coef_probs, 0x5A0);
338#undef ASSERT_POSITION
339
301}; // namespace Decoder 340}; // namespace Decoder
302}; // namespace Tegra 341}; // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index e4f919afd..b5e3b70fc 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -8,22 +8,21 @@
8 8
9namespace Tegra { 9namespace Tegra {
10 10
11Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} 11#define NVDEC_REG_INDEX(field_name) \
12 (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
13
14Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
12 15
13Nvdec::~Nvdec() = default; 16Nvdec::~Nvdec() = default;
14 17
15void Nvdec::ProcessMethod(Method method, u32 argument) { 18void Nvdec::ProcessMethod(u32 method, u32 argument) {
16 if (method == Method::SetVideoCodec) { 19 state.reg_array[method] = static_cast<u64>(argument) << 8;
17 codec->StateWrite(static_cast<u32>(method), argument);
18 } else {
19 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
20 }
21 20
22 switch (method) { 21 switch (method) {
23 case Method::SetVideoCodec: 22 case NVDEC_REG_INDEX(set_codec_id):
24 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); 23 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
25 break; 24 break;
26 case Method::Execute: 25 case NVDEC_REG_INDEX(execute):
27 Execute(); 26 Execute();
28 break; 27 break;
29 } 28 }
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e66be80b8..6e1da0b04 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -14,16 +14,11 @@ class GPU;
14 14
15class Nvdec { 15class Nvdec {
16public: 16public:
17 enum class Method : u32 {
18 SetVideoCodec = 0x80,
19 Execute = 0xc0,
20 };
21
22 explicit Nvdec(GPU& gpu); 17 explicit Nvdec(GPU& gpu);
23 ~Nvdec(); 18 ~Nvdec();
24 19
25 /// Writes the method into the state, Invoke Execute() if encountered 20 /// Writes the method into the state, Invoke Execute() if encountered
26 void ProcessMethod(Method method, u32 argument); 21 void ProcessMethod(u32 method, u32 argument);
27 22
28 /// Return most recently decoded frame 23 /// Return most recently decoded frame
29 [[nodiscard]] AVFramePtr GetFrame(); 24 [[nodiscard]] AVFramePtr GetFrame();
@@ -33,6 +28,7 @@ private:
33 void Execute(); 28 void Execute();
34 29
35 GPU& gpu; 30 GPU& gpu;
31 NvdecCommon::NvdecRegisters state;
36 std::unique_ptr<Codec> codec; 32 std::unique_ptr<Codec> codec;
37}; 33};
38} // namespace Tegra 34} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
index 01b5e086d..6a24e00a0 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -4,40 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
7#include "common/common_funcs.h" 8#include "common/common_funcs.h"
8#include "common/common_types.h" 9#include "common/common_types.h"
9 10
10namespace Tegra::NvdecCommon { 11namespace Tegra::NvdecCommon {
11 12
12struct NvdecRegisters { 13enum class VideoCodec : u64 {
13 INSERT_PADDING_WORDS(256);
14 u64 set_codec_id{};
15 INSERT_PADDING_WORDS(254);
16 u64 set_platform_id{};
17 u64 picture_info_offset{};
18 u64 frame_bitstream_offset{};
19 u64 frame_number{};
20 u64 h264_slice_data_offsets{};
21 u64 h264_mv_dump_offset{};
22 INSERT_PADDING_WORDS(6);
23 u64 frame_stats_offset{};
24 u64 h264_last_surface_luma_offset{};
25 u64 h264_last_surface_chroma_offset{};
26 std::array<u64, 17> surface_luma_offset{};
27 std::array<u64, 17> surface_chroma_offset{};
28 INSERT_PADDING_WORDS(132);
29 u64 vp9_entropy_probs_offset{};
30 u64 vp9_backward_updates_offset{};
31 u64 vp9_last_frame_segmap_offset{};
32 u64 vp9_curr_frame_segmap_offset{};
33 INSERT_PADDING_WORDS(2);
34 u64 vp9_last_frame_mvs_offset{};
35 u64 vp9_curr_frame_mvs_offset{};
36 INSERT_PADDING_WORDS(2);
37};
38static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
39
40enum class VideoCodec : u32 {
41 None = 0x0, 14 None = 0x0,
42 H264 = 0x3, 15 H264 = 0x3,
43 Vp8 = 0x5, 16 Vp8 = 0x5,
@@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
45 Vp9 = 0x9, 18 Vp9 = 0x9,
46}; 19};
47 20
21// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
22// doubling the sizes here is compensating for that.
23struct NvdecRegisters {
24 static constexpr std::size_t NUM_REGS = 0x178;
25
26 union {
27 struct {
28 INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
29 VideoCodec set_codec_id; ///< 0x0400
30 INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
31 u64 execute; ///< 0x0600
32 INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
33 struct { ///< 0x0800
34 union {
35 BitField<0, 3, VideoCodec> codec;
36 BitField<4, 1, u64> gp_timer_on;
37 BitField<13, 1, u64> mb_timer_on;
38 BitField<14, 1, u64> intra_frame_pslc;
39 BitField<17, 1, u64> all_intra_frame;
40 };
41 } control_params;
42 u64 picture_info_offset; ///< 0x0808
43 u64 frame_bitstream_offset; ///< 0x0810
44 u64 frame_number; ///< 0x0818
45 u64 h264_slice_data_offsets; ///< 0x0820
46 u64 h264_mv_dump_offset; ///< 0x0828
47 INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
48 u64 frame_stats_offset; ///< 0x0848
49 u64 h264_last_surface_luma_offset; ///< 0x0850
50 u64 h264_last_surface_chroma_offset; ///< 0x0858
51 std::array<u64, 17> surface_luma_offset; ///< 0x0860
52 std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
53 INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
54 u64 vp9_entropy_probs_offset; ///< 0x0B80
55 u64 vp9_backward_updates_offset; ///< 0x0B88
56 u64 vp9_last_frame_segmap_offset; ///< 0x0B90
57 u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
58 INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
59 u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
60 u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
61 INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
62 };
63 std::array<u64, NUM_REGS> reg_array;
64 };
65};
66static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
67
68#define ASSERT_REG_POSITION(field_name, position) \
69 static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \
70 "Field " #field_name " has invalid position")
71
72ASSERT_REG_POSITION(set_codec_id, 0x80);
73ASSERT_REG_POSITION(execute, 0xC0);
74ASSERT_REG_POSITION(control_params, 0x100);
75ASSERT_REG_POSITION(picture_info_offset, 0x101);
76ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
77ASSERT_REG_POSITION(frame_number, 0x103);
78ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
79ASSERT_REG_POSITION(frame_stats_offset, 0x109);
80ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
81ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
82ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
83ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
84ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
85ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
86ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
87ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
88ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
89ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
90
91#undef ASSERT_REG_POSITION
92
48} // namespace Tegra::NvdecCommon 93} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 0a8b82f2b..ff3db0aee 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -3,7 +3,21 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array> 5#include <array>
6
7extern "C" {
8#if defined(__GNUC__) || defined(__clang__)
9#pragma GCC diagnostic push
10#pragma GCC diagnostic ignored "-Wconversion"
11#endif
12#include <libswscale/swscale.h>
13#if defined(__GNUC__) || defined(__clang__)
14#pragma GCC diagnostic pop
15#endif
16}
17
6#include "common/assert.h" 18#include "common/assert.h"
19#include "common/logging/log.h"
20
7#include "video_core/command_classes/nvdec.h" 21#include "video_core/command_classes/nvdec.h"
8#include "video_core/command_classes/vic.h" 22#include "video_core/command_classes/vic.h"
9#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
@@ -11,10 +25,6 @@
11#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
12#include "video_core/textures/decoders.h" 26#include "video_core/textures/decoders.h"
13 27
14extern "C" {
15#include <libswscale/swscale.h>
16}
17
18namespace Tegra { 28namespace Tegra {
19 29
20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) 30Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0f640fdae..f26530ede 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -7,6 +7,10 @@
7#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10#include "video_core/surface.h"
11
12using VideoCore::Surface::BytesPerBlock;
13using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
10 14
11namespace Tegra::Engines { 15namespace Tegra::Engines {
12 16
@@ -49,7 +53,7 @@ void Fermi2D::Blit() {
49 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); 53 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
50 54
51 const auto& args = regs.pixels_from_memory; 55 const auto& args = regs.pixels_from_memory;
52 const Config config{ 56 Config config{
53 .operation = regs.operation, 57 .operation = regs.operation,
54 .filter = args.sample_mode.filter, 58 .filter = args.sample_mode.filter,
55 .dst_x0 = args.dst_x0, 59 .dst_x0 = args.dst_x0,
@@ -61,7 +65,21 @@ void Fermi2D::Blit() {
61 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), 65 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
62 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), 66 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
63 }; 67 };
64 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { 68 Surface src = regs.src;
69 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
70 const auto need_align_to_pitch =
71 src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
72 static_cast<s32>(src.width) == config.src_x1 &&
73 config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
74 if (need_align_to_pitch) {
75 auto address = src.Address() + config.src_x0 * bytes_per_pixel;
76 src.addr_upper = static_cast<u32>(address >> 32);
77 src.addr_lower = static_cast<u32>(address);
78 src.width -= config.src_x0;
79 config.src_x1 -= config.src_x0;
80 config.src_x0 = 0;
81 }
82 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
65 UNIMPLEMENTED(); 83 UNIMPLEMENTED();
66 } 84 }
67} 85}
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2208e1922..c9cff7450 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,7 +18,10 @@ set(SHADER_FILES
18 vulkan_uint8.comp 18 vulkan_uint8.comp
19) 19)
20 20
21find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) 21find_program(GLSLANGVALIDATOR "glslangValidator")
22if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND")
23 message(FATAL_ERROR "Required program `glslangValidator` not found.")
24endif()
22 25
23set(GLSL_FLAGS "") 26set(GLSL_FLAGS "")
24set(QUIET_FLAG "--quiet") 27set(QUIET_FLAG "--quiet")
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7124c755c..d2b9d5f2b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
69 } else { 69 } else {
70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); 70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
71 } 71 }
72 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
73 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
74 ASSERT(cpu_addr);
75 72
76 rasterizer->UnmapMemory(*cpu_addr, size); 73 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
74
75 for (const auto& map : submapped_ranges) {
76 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
77 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
78 ASSERT(cpu_addr);
79
80 rasterizer->UnmapMemory(*cpu_addr, map.second);
81 }
77 82
78 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); 83 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
79} 84}
@@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
127 132
128 //// Lock the new page 133 //// Lock the new page
129 // TryLockPage(page_entry, size); 134 // TryLockPage(page_entry, size);
135 auto& current_page = page_table[PageEntryIndex(gpu_addr)];
130 136
131 page_table[PageEntryIndex(gpu_addr)] = page_entry; 137 if ((!current_page.IsValid() && page_entry.IsValid()) ||
138 current_page.ToAddress() != page_entry.ToAddress()) {
139 rasterizer->ModifyGPUMemory(gpu_addr, size);
140 }
141
142 current_page = page_entry;
132} 143}
133 144
134std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, 145std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
@@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
174 return page_entry.ToAddress() + (gpu_addr & page_mask); 185 return page_entry.ToAddress() + (gpu_addr & page_mask);
175} 186}
176 187
188std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
189 size_t page_index{addr >> page_bits};
190 const size_t page_last{(addr + size + page_size - 1) >> page_bits};
191 while (page_index < page_last) {
192 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
193 if (page_addr && *page_addr != 0) {
194 return page_addr;
195 }
196 ++page_index;
197 }
198 return std::nullopt;
199}
200
177template <typename T> 201template <typename T>
178T MemoryManager::Read(GPUVAddr addr) const { 202T MemoryManager::Read(GPUVAddr addr) const {
179 if (auto page_pointer{GetPointer(addr)}; page_pointer) { 203 if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
370 return page <= Core::Memory::PAGE_SIZE; 394 return page <= Core::Memory::PAGE_SIZE;
371} 395}
372 396
397bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
398 size_t page_index{gpu_addr >> page_bits};
399 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
400 std::optional<VAddr> old_page_addr{};
401 while (page_index != page_last) {
402 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
403 if (!page_addr || *page_addr == 0) {
404 return false;
405 }
406 if (old_page_addr) {
407 if (*old_page_addr + page_size != *page_addr) {
408 return false;
409 }
410 }
411 old_page_addr = page_addr;
412 ++page_index;
413 }
414 return true;
415}
416
417bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
418 size_t page_index{gpu_addr >> page_bits};
419 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
420 while (page_index < page_last) {
421 if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
422 return false;
423 }
424 ++page_index;
425 }
426 return true;
427}
428
429std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
430 GPUVAddr gpu_addr, std::size_t size) const {
431 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
432 size_t page_index{gpu_addr >> page_bits};
433 size_t remaining_size{size};
434 size_t page_offset{gpu_addr & page_mask};
435 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
436 std::optional<VAddr> old_page_addr{};
437 const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
438 if (!last_segment) {
439 GPUVAddr new_base_addr = page_index << page_bits;
440 last_segment = {new_base_addr, bytes};
441 } else {
442 last_segment->second += bytes;
443 }
444 };
445 const auto split = [this, &last_segment, &result] {
446 if (last_segment) {
447 result.push_back(*last_segment);
448 last_segment = std::nullopt;
449 }
450 };
451 while (remaining_size > 0) {
452 const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
453 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
454 if (!page_addr) {
455 split();
456 } else if (old_page_addr) {
457 if (*old_page_addr + page_size != *page_addr) {
458 split();
459 }
460 extend_size(num_bytes);
461 } else {
462 extend_size(num_bytes);
463 }
464 ++page_index;
465 page_offset = 0;
466 remaining_size -= num_bytes;
467 }
468 split();
469 return result;
470}
471
373} // namespace Tegra 472} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b3538d503..99d13e7f6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,6 +76,8 @@ public:
76 76
77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
78 78
79 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
80
79 template <typename T> 81 template <typename T>
80 [[nodiscard]] T Read(GPUVAddr addr) const; 82 [[nodiscard]] T Read(GPUVAddr addr) const;
81 83
@@ -112,10 +114,28 @@ public:
112 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); 114 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
113 115
114 /** 116 /**
115 * IsGranularRange checks if a gpu region can be simply read with a pointer. 117 * Checks if a gpu region can be simply read with a pointer.
116 */ 118 */
117 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; 119 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
118 120
121 /**
122 * Checks if a gpu region is mapped by a single range of cpu addresses.
123 */
124 [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
125
126 /**
127 * Checks if a gpu region is mapped entirely.
128 */
129 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
130
131 /**
132 * Returns a vector with all the subranges of cpu addresses mapped beneath.
133 * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
134 * will be returned;
135 */
136 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
137 std::size_t size) const;
138
119 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); 139 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
120 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); 140 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
121 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); 141 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 07939432f..0cec4225b 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -87,6 +87,9 @@ public:
87 /// Unmap memory range 87 /// Unmap memory range
88 virtual void UnmapMemory(VAddr addr, u64 size) = 0; 88 virtual void UnmapMemory(VAddr addr, u64 size) = 0;
89 89
90 /// Remap GPU memory range. This means underneath backing memory changed
91 virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
92
90 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 93 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
91 /// and invalidated 94 /// and invalidated
92 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 95 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 320ee8d30..63d8ad42a 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -42,6 +42,8 @@ public:
42 42
43 [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; 43 [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
44 44
45 [[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
46
45 // Getter/setter functions: 47 // Getter/setter functions:
46 // ------------------------ 48 // ------------------------
47 49
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3f4532ca7..3b00614e7 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -202,13 +202,13 @@ Device::Device() {
202 LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); 202 LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
203 throw std::runtime_error{"Insufficient version"}; 203 throw std::runtime_error{"Insufficient version"};
204 } 204 }
205 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 205 vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
206 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); 206 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
207 const std::vector extensions = GetExtensions(); 207 const std::vector extensions = GetExtensions();
208 208
209 const bool is_nvidia = vendor == "NVIDIA Corporation"; 209 const bool is_nvidia = vendor_name == "NVIDIA Corporation";
210 const bool is_amd = vendor == "ATI Technologies Inc."; 210 const bool is_amd = vendor_name == "ATI Technologies Inc.";
211 const bool is_intel = vendor == "Intel"; 211 const bool is_intel = vendor_name == "Intel";
212 212
213#ifdef __unix__ 213#ifdef __unix__
214 const bool is_linux = true; 214 const bool is_linux = true;
@@ -275,6 +275,56 @@ Device::Device() {
275 } 275 }
276} 276}
277 277
278std::string Device::GetVendorName() const {
279 if (vendor_name == "NVIDIA Corporation") {
280 return "NVIDIA";
281 }
282 if (vendor_name == "ATI Technologies Inc.") {
283 return "AMD";
284 }
285 if (vendor_name == "Intel") {
286 // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris.
287 // Simply return `INTEL` for those as well as the Windows driver.
288 return "INTEL";
289 }
290 if (vendor_name == "Intel Open Source Technology Center") {
291 return "I965";
292 }
293 if (vendor_name == "Mesa Project") {
294 return "I915";
295 }
296 if (vendor_name == "Mesa/X.org") {
297 // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return
298 // MESA instead of one of those driver names.
299 return "MESA";
300 }
301 if (vendor_name == "AMD") {
302 return "RADEONSI";
303 }
304 if (vendor_name == "nouveau") {
305 return "NOUVEAU";
306 }
307 if (vendor_name == "X.Org") {
308 return "R600";
309 }
310 if (vendor_name == "Collabora Ltd") {
311 return "ZINK";
312 }
313 if (vendor_name == "Intel Corporation") {
314 return "OPENSWR";
315 }
316 if (vendor_name == "Microsoft Corporation") {
317 return "D3D12";
318 }
319 if (vendor_name == "NVIDIA") {
320 // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default
321 // strategy would have returned `NVIDIA` here for this driver, the same result as the
322 // proprietary driver.
323 return "TEGRA";
324 }
325 return vendor_name;
326}
327
278Device::Device(std::nullptr_t) { 328Device::Device(std::nullptr_t) {
279 max_uniform_buffers.fill(std::numeric_limits<u32>::max()); 329 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
280 uniform_buffer_alignment = 4; 330 uniform_buffer_alignment = 4;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index f24bd0c7b..2c2b13767 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -22,6 +22,8 @@ public:
22 explicit Device(); 22 explicit Device();
23 explicit Device(std::nullptr_t); 23 explicit Device(std::nullptr_t);
24 24
25 [[nodiscard]] std::string GetVendorName() const;
26
25 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { 27 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
26 return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; 28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
27 } 29 }
@@ -130,6 +132,7 @@ private:
130 static bool TestVariableAoffi(); 132 static bool TestVariableAoffi();
131 static bool TestPreciseBug(); 133 static bool TestPreciseBug();
132 134
135 std::string vendor_name;
133 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; 136 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
134 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; 137 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
135 size_t uniform_buffer_alignment{}; 138 size_t uniform_buffer_alignment{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eb8bdaa85..07ad0e205 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
611 shader_cache.OnCPUWrite(addr, size); 611 shader_cache.OnCPUWrite(addr, size);
612} 612}
613 613
614void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
615 {
616 std::scoped_lock lock{texture_cache.mutex};
617 texture_cache.UnmapGPUMemory(addr, size);
618 }
619}
620
614void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 621void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
615 if (!gpu.IsAsync()) { 622 if (!gpu.IsAsync()) {
616 gpu_memory.Write<u32>(addr, value); 623 gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9995a563b..482efed7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,6 +80,7 @@ public:
80 void OnCPUWrite(VAddr addr, u64 size) override; 80 void OnCPUWrite(VAddr addr, u64 size) override;
81 void SyncGuestHost() override; 81 void SyncGuestHost() override;
82 void UnmapMemory(VAddr addr, u64 size) override; 82 void UnmapMemory(VAddr addr, u64 size) override;
83 void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
83 void SignalSemaphore(GPUVAddr addr, u32 value) override; 84 void SignalSemaphore(GPUVAddr addr, u32 value) override;
84 void SignalSyncPoint(u32 value) override; 85 void SignalSyncPoint(u32 value) override;
85 void ReleaseFences() override; 86 void ReleaseFences() override;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 23948feed..ff0f03e99 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -327,7 +327,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
327 if (format_info.is_compressed) { 327 if (format_info.is_compressed) {
328 return false; 328 return false;
329 } 329 }
330 if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { 330 if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) ==
331 ACCELERATED_FORMATS.end()) {
331 return false; 332 return false;
332 } 333 }
333 if (format_info.compatibility_by_size) { 334 if (format_info.compatibility_by_size) {
@@ -341,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
341[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, 342[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
342 VideoCommon::SubresourceLayers subresource, GLenum target) { 343 VideoCommon::SubresourceLayers subresource, GLenum target) {
343 switch (target) { 344 switch (target) {
345 case GL_TEXTURE_1D:
346 return CopyOrigin{
347 .level = static_cast<GLint>(subresource.base_level),
348 .x = static_cast<GLint>(offset.x),
349 .y = static_cast<GLint>(0),
350 .z = static_cast<GLint>(0),
351 };
352 case GL_TEXTURE_1D_ARRAY:
353 return CopyOrigin{
354 .level = static_cast<GLint>(subresource.base_level),
355 .x = static_cast<GLint>(offset.x),
356 .y = static_cast<GLint>(0),
357 .z = static_cast<GLint>(subresource.base_layer),
358 };
344 case GL_TEXTURE_2D_ARRAY: 359 case GL_TEXTURE_2D_ARRAY:
345 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 360 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
346 return CopyOrigin{ 361 return CopyOrigin{
@@ -366,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
366 VideoCommon::SubresourceLayers dst_subresource, 381 VideoCommon::SubresourceLayers dst_subresource,
367 GLenum target) { 382 GLenum target) {
368 switch (target) { 383 switch (target) {
384 case GL_TEXTURE_1D:
385 return CopyRegion{
386 .width = static_cast<GLsizei>(extent.width),
387 .height = static_cast<GLsizei>(1),
388 .depth = static_cast<GLsizei>(1),
389 };
390 case GL_TEXTURE_1D_ARRAY:
391 return CopyRegion{
392 .width = static_cast<GLsizei>(extent.width),
393 .height = static_cast<GLsizei>(1),
394 .depth = static_cast<GLsizei>(dst_subresource.num_layers),
395 };
369 case GL_TEXTURE_2D_ARRAY: 396 case GL_TEXTURE_2D_ARRAY:
370 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 397 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
371 return CopyRegion{ 398 return CopyRegion{
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index cc19a110f..0b66f8332 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -70,6 +70,10 @@ public:
70 return &rasterizer; 70 return &rasterizer;
71 } 71 }
72 72
73 [[nodiscard]] std::string GetDeviceVendor() const override {
74 return device.GetVendorName();
75 }
76
73private: 77private:
74 /// Initializes the OpenGL state and creates persistent objects. 78 /// Initializes the OpenGL state and creates persistent objects.
75 void InitOpenGLObjects(); 79 void InitOpenGLObjects();
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index abaf1ee6a..8fb5be393 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
261 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); 261 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
262 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); 262 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
263 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), 263 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
264 copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); 264 copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
265 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), 265 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
266 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); 266 copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
267 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); 267 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
268 } 268 }
269 program_manager.RestoreGuestCompute(); 269 program_manager.RestoreGuestCompute();
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 72071316c..d7d17e110 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -47,6 +47,10 @@ public:
47 return &rasterizer; 47 return &rasterizer;
48 } 48 }
49 49
50 [[nodiscard]] std::string GetDeviceVendor() const override {
51 return device.GetDriverName();
52 }
53
50private: 54private:
51 void Report() const; 55 void Report() const;
52 56
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 8cb65e588..0df4e1a1c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) {
55template <typename T> 55template <typename T>
56std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { 56std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
57 std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; 57 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
58 std::ranges::transform(indices, indices.begin(), 58 for (T& index : indices) {
59 [quad, first](u32 index) { return first + index + quad * 4; }); 59 index = static_cast<T>(first + index + quad * 4);
60 }
60 return indices; 61 return indices;
61} 62}
62} // Anonymous namespace 63} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1c9120170..bd4d649cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
557 pipeline_cache.OnCPUWrite(addr, size); 557 pipeline_cache.OnCPUWrite(addr, size);
558} 558}
559 559
560void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
561 {
562 std::scoped_lock lock{texture_cache.mutex};
563 texture_cache.UnmapGPUMemory(addr, size);
564 }
565}
566
560void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 567void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
561 if (!gpu.IsAsync()) { 568 if (!gpu.IsAsync()) {
562 gpu_memory.Write<u32>(addr, value); 569 gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb8c5c279..41459c5c5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -72,6 +72,7 @@ public:
72 void OnCPUWrite(VAddr addr, u64 size) override; 72 void OnCPUWrite(VAddr addr, u64 size) override;
73 void SyncGuestHost() override; 73 void SyncGuestHost() override;
74 void UnmapMemory(VAddr addr, u64 size) override; 74 void UnmapMemory(VAddr addr, u64 size) override;
75 void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
75 void SignalSemaphore(GPUVAddr addr, u32 value) override; 76 void SignalSemaphore(GPUVAddr addr, u32 value) override;
76 void SignalSyncPoint(u32 value) override; 77 void SignalSyncPoint(u32 value) override;
77 void ReleaseFences() override; 78 void ReleaseFences() override;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index ad69d32d1..6052d148a 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
69 } 69 }
70} 70}
71 71
72ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
73 : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
74
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { 75std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) { 76 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base 77 // Subresource address can't be lower than the base
@@ -82,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const
82 if (info.type != ImageType::e3D) { 85 if (info.type != ImageType::e3D) {
83 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); 86 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
84 const auto end = mip_level_offsets.begin() + info.resources.levels; 87 const auto end = mip_level_offsets.begin() + info.resources.levels;
85 const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); 88 const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset));
86 if (layer > info.resources.layers || it == end) { 89 if (layer > info.resources.layers || it == end) {
87 return std::nullopt; 90 return std::nullopt;
88 } 91 }
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index e326cab71..ff1feda9b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 {
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted 25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered 26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked 27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28 Remapped = 1 << 8, ///< Image has been remapped.
29 Sparse = 1 << 9, ///< Image has non continous submemory.
28 30
29 // Garbage Collection Flags 31 // Garbage Collection Flags
30 BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher 32 BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher
31 ///< garbage collection priority 33 ///< garbage collection priority
32 Alias = 1 << 9, ///< This image has aliases and has priority on garbage 34 Alias = 1 << 11, ///< This image has aliases and has priority on garbage
33 ///< collection 35 ///< collection
34}; 36};
35DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 37DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
36 38
@@ -57,6 +59,12 @@ struct ImageBase {
57 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; 59 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
58 } 60 }
59 61
62 [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
63 const VAddr overlap_end = overlap_gpu_addr + overlap_size;
64 const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
65 return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
66 }
67
60 void CheckBadOverlapState(); 68 void CheckBadOverlapState();
61 void CheckAliasState(); 69 void CheckAliasState();
62 70
@@ -84,6 +92,29 @@ struct ImageBase {
84 92
85 std::vector<AliasedImage> aliased_images; 93 std::vector<AliasedImage> aliased_images;
86 std::vector<ImageId> overlapping_images; 94 std::vector<ImageId> overlapping_images;
95 ImageMapId map_view_id{};
96};
97
98struct ImageMapView {
99 explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
100
101 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
102 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
103 const VAddr cpu_addr_end = cpu_addr + size;
104 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
105 }
106
107 [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
108 const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
109 const GPUVAddr gpu_addr_end = gpu_addr + size;
110 return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
111 }
112
113 GPUVAddr gpu_addr;
114 VAddr cpu_addr;
115 size_t size;
116 ImageId image_id;
117 bool picked{};
87}; 118};
88 119
89struct ImageAllocBase { 120struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 84530a179..01de2d498 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,6 +13,7 @@
13#include <span> 13#include <span>
14#include <type_traits> 14#include <type_traits>
15#include <unordered_map> 15#include <unordered_map>
16#include <unordered_set>
16#include <utility> 17#include <utility>
17#include <vector> 18#include <vector>
18 19
@@ -110,9 +111,6 @@ public:
110 /// Notify the cache that a new frame has been queued 111 /// Notify the cache that a new frame has been queued
111 void TickFrame(); 112 void TickFrame();
112 113
113 /// Runs the Garbage Collector.
114 void RunGarbageCollector();
115
116 /// Return a constant reference to the given image view id 114 /// Return a constant reference to the given image view id
117 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; 115 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
118 116
@@ -155,12 +153,13 @@ public:
155 /// Remove images in a region 153 /// Remove images in a region
156 void UnmapMemory(VAddr cpu_addr, size_t size); 154 void UnmapMemory(VAddr cpu_addr, size_t size);
157 155
156 /// Remove images in a region
157 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
158
158 /// Blit an image with the given parameters 159 /// Blit an image with the given parameters
159 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 160 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
160 const Tegra::Engines::Fermi2D::Surface& src, 161 const Tegra::Engines::Fermi2D::Surface& src,
161 const Tegra::Engines::Fermi2D::Config& copy, 162 const Tegra::Engines::Fermi2D::Config& copy);
162 std::optional<Region2D> src_region_override = {},
163 std::optional<Region2D> dst_region_override = {});
164 163
165 /// Invalidate the contents of the color buffer index 164 /// Invalidate the contents of the color buffer index
166 /// These contents become unspecified, the cache can assume aggressive optimizations. 165 /// These contents become unspecified, the cache can assume aggressive optimizations.
@@ -193,7 +192,22 @@ public:
193private: 192private:
194 /// Iterate over all page indices in a range 193 /// Iterate over all page indices in a range
195 template <typename Func> 194 template <typename Func>
196 static void ForEachPage(VAddr addr, size_t size, Func&& func) { 195 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
196 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
197 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
198 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
199 if constexpr (RETURNS_BOOL) {
200 if (func(page)) {
201 break;
202 }
203 } else {
204 func(page);
205 }
206 }
207 }
208
209 template <typename Func>
210 static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
197 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 211 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
198 const u64 page_end = (addr + size - 1) >> PAGE_BITS; 212 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
199 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { 213 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -207,6 +221,9 @@ private:
207 } 221 }
208 } 222 }
209 223
224 /// Runs the Garbage Collector.
225 void RunGarbageCollector();
226
210 /// Fills image_view_ids in the image views in indices 227 /// Fills image_view_ids in the image views in indices
211 void FillImageViews(DescriptorTable<TICEntry>& table, 228 void FillImageViews(DescriptorTable<TICEntry>& table,
212 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, 229 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
@@ -220,7 +237,7 @@ private:
220 FramebufferId GetFramebufferId(const RenderTargets& key); 237 FramebufferId GetFramebufferId(const RenderTargets& key);
221 238
222 /// Refresh the contents (pixel data) of an image 239 /// Refresh the contents (pixel data) of an image
223 void RefreshContents(Image& image); 240 void RefreshContents(Image& image, ImageId image_id);
224 241
225 /// Upload data from guest to an image 242 /// Upload data from guest to an image
226 template <typename StagingBuffer> 243 template <typename StagingBuffer>
@@ -269,6 +286,16 @@ private:
269 template <typename Func> 286 template <typename Func>
270 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 287 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
271 288
289 template <typename Func>
290 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
291
292 template <typename Func>
293 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
294
295 /// Iterates over all the images in a region calling func
296 template <typename Func>
297 void ForEachSparseSegment(ImageBase& image, Func&& func);
298
272 /// Find or create an image view in the given image with the passed parameters 299 /// Find or create an image view in the given image with the passed parameters
273 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); 300 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
274 301
@@ -279,10 +306,10 @@ private:
279 void UnregisterImage(ImageId image); 306 void UnregisterImage(ImageId image);
280 307
281 /// Track CPU reads and writes for image 308 /// Track CPU reads and writes for image
282 void TrackImage(ImageBase& image); 309 void TrackImage(ImageBase& image, ImageId image_id);
283 310
284 /// Stop tracking CPU reads and writes for image 311 /// Stop tracking CPU reads and writes for image
285 void UntrackImage(ImageBase& image); 312 void UntrackImage(ImageBase& image, ImageId image_id);
286 313
287 /// Delete image from the cache 314 /// Delete image from the cache
288 void DeleteImage(ImageId image); 315 void DeleteImage(ImageId image);
@@ -340,7 +367,13 @@ private:
340 std::unordered_map<TSCEntry, SamplerId> samplers; 367 std::unordered_map<TSCEntry, SamplerId> samplers;
341 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 368 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
342 369
343 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; 370 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
371 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
372 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
373
374 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
375
376 VAddr virtual_invalid_space{};
344 377
345 bool has_deleted_images = false; 378 bool has_deleted_images = false;
346 u64 total_used_memory = 0; 379 u64 total_used_memory = 0;
@@ -349,6 +382,7 @@ private:
349 u64 critical_memory; 382 u64 critical_memory;
350 383
351 SlotVector<Image> slot_images; 384 SlotVector<Image> slot_images;
385 SlotVector<ImageMapView> slot_map_views;
352 SlotVector<ImageView> slot_image_views; 386 SlotVector<ImageView> slot_image_views;
353 SlotVector<ImageAlloc> slot_image_allocs; 387 SlotVector<ImageAlloc> slot_image_allocs;
354 SlotVector<Sampler> slot_samplers; 388 SlotVector<Sampler> slot_samplers;
@@ -459,7 +493,7 @@ void TextureCache<P>::RunGarbageCollector() {
459 } 493 }
460 } 494 }
461 if (True(image->flags & ImageFlagBits::Tracked)) { 495 if (True(image->flags & ImageFlagBits::Tracked)) {
462 UntrackImage(*image); 496 UntrackImage(*image, image_id);
463 } 497 }
464 UnregisterImage(image_id); 498 UnregisterImage(image_id);
465 DeleteImage(image_id); 499 DeleteImage(image_id);
@@ -658,7 +692,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
658 return; 692 return;
659 } 693 }
660 image.flags |= ImageFlagBits::CpuModified; 694 image.flags |= ImageFlagBits::CpuModified;
661 UntrackImage(image); 695 if (True(image.flags & ImageFlagBits::Tracked)) {
696 UntrackImage(image, image_id);
697 }
662 }); 698 });
663} 699}
664 700
@@ -695,7 +731,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
695 for (const ImageId id : deleted_images) { 731 for (const ImageId id : deleted_images) {
696 Image& image = slot_images[id]; 732 Image& image = slot_images[id];
697 if (True(image.flags & ImageFlagBits::Tracked)) { 733 if (True(image.flags & ImageFlagBits::Tracked)) {
698 UntrackImage(image); 734 UntrackImage(image, id);
699 } 735 }
700 UnregisterImage(id); 736 UnregisterImage(id);
701 DeleteImage(id); 737 DeleteImage(id);
@@ -703,11 +739,26 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
703} 739}
704 740
705template <class P> 741template <class P>
742void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
743 std::vector<ImageId> deleted_images;
744 ForEachImageInRegionGPU(gpu_addr, size,
745 [&](ImageId id, Image&) { deleted_images.push_back(id); });
746 for (const ImageId id : deleted_images) {
747 Image& image = slot_images[id];
748 if (True(image.flags & ImageFlagBits::Remapped)) {
749 continue;
750 }
751 image.flags |= ImageFlagBits::Remapped;
752 if (True(image.flags & ImageFlagBits::Tracked)) {
753 UntrackImage(image, id);
754 }
755 }
756}
757
758template <class P>
706void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 759void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
707 const Tegra::Engines::Fermi2D::Surface& src, 760 const Tegra::Engines::Fermi2D::Surface& src,
708 const Tegra::Engines::Fermi2D::Config& copy, 761 const Tegra::Engines::Fermi2D::Config& copy) {
709 std::optional<Region2D> src_override,
710 std::optional<Region2D> dst_override) {
711 const BlitImages images = GetBlitImages(dst, src); 762 const BlitImages images = GetBlitImages(dst, src);
712 const ImageId dst_id = images.dst_id; 763 const ImageId dst_id = images.dst_id;
713 const ImageId src_id = images.src_id; 764 const ImageId src_id = images.src_id;
@@ -718,47 +769,25 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
718 const ImageBase& src_image = slot_images[src_id]; 769 const ImageBase& src_image = slot_images[src_id];
719 770
720 // TODO: Deduplicate 771 // TODO: Deduplicate
721 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
722 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
723 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
724 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
725 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
726
727 // out of bounds texture blit checking
728 const bool use_override = src_override.has_value();
729 const s32 src_x0 = copy.src_x0 >> src_samples_x;
730 s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
731 const s32 src_y0 = copy.src_y0 >> src_samples_y;
732 const s32 src_y1 = copy.src_y1 >> src_samples_y;
733
734 const auto src_width = static_cast<s32>(src_image.info.size.width);
735 const bool width_oob = src_x1 > src_width;
736 const auto width_diff = width_oob ? src_x1 - src_width : 0;
737 if (width_oob) {
738 src_x1 = src_width;
739 }
740
741 const Region2D src_dimensions{
742 Offset2D{.x = src_x0, .y = src_y0},
743 Offset2D{.x = src_x1, .y = src_y1},
744 };
745 const auto src_region = use_override ? *src_override : src_dimensions;
746
747 const std::optional src_base = src_image.TryFindBase(src.Address()); 772 const std::optional src_base = src_image.TryFindBase(src.Address());
748 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; 773 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
749 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); 774 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
750 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); 775 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
751 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); 776 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
777 const Region2D src_region{
778 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
779 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
780 };
752 781
753 const s32 dst_x0 = copy.dst_x0 >> dst_samples_x; 782 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
754 const s32 dst_x1 = copy.dst_x1 >> dst_samples_x; 783 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
755 const s32 dst_y0 = copy.dst_y0 >> dst_samples_y; 784 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
756 const s32 dst_y1 = copy.dst_y1 >> dst_samples_y; 785 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
757 const Region2D dst_dimensions{ 786 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
758 Offset2D{.x = dst_x0, .y = dst_y0}, 787 const Region2D dst_region{
759 Offset2D{.x = dst_x1 - width_diff, .y = dst_y1}, 788 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
789 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
760 }; 790 };
761 const auto dst_region = use_override ? *dst_override : dst_dimensions;
762 791
763 // Always call this after src_framebuffer_id was queried, as the address might be invalidated. 792 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
764 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; 793 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@@ -775,21 +804,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
775 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, 804 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
776 copy.operation); 805 copy.operation);
777 } 806 }
778
779 if (width_oob) {
780 // Continue copy of the oob region of the texture on the next row
781 auto oob_src = src;
782 oob_src.height++;
783 const Region2D src_region_override{
784 Offset2D{.x = 0, .y = src_y0 + 1},
785 Offset2D{.x = width_diff, .y = src_y1 + 1},
786 };
787 const Region2D dst_region_override{
788 Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
789 Offset2D{.x = dst_x1, .y = dst_y1},
790 };
791 BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
792 }
793} 807}
794 808
795template <class P> 809template <class P>
@@ -833,9 +847,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
833 if (it == page_table.end()) { 847 if (it == page_table.end()) {
834 return nullptr; 848 return nullptr;
835 } 849 }
836 const auto& image_ids = it->second; 850 const auto& image_map_ids = it->second;
837 for (const ImageId image_id : image_ids) { 851 for (const ImageMapId map_id : image_map_ids) {
838 const ImageBase& image = slot_images[image_id]; 852 const ImageMapView& map = slot_map_views[map_id];
853 const ImageBase& image = slot_images[map.image_id];
839 if (image.cpu_addr != cpu_addr) { 854 if (image.cpu_addr != cpu_addr) {
840 continue; 855 continue;
841 } 856 }
@@ -915,13 +930,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
915} 930}
916 931
917template <class P> 932template <class P>
918void TextureCache<P>::RefreshContents(Image& image) { 933void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
919 if (False(image.flags & ImageFlagBits::CpuModified)) { 934 if (False(image.flags & ImageFlagBits::CpuModified)) {
920 // Only upload modified images 935 // Only upload modified images
921 return; 936 return;
922 } 937 }
923 image.flags &= ~ImageFlagBits::CpuModified; 938 image.flags &= ~ImageFlagBits::CpuModified;
924 TrackImage(image); 939 TrackImage(image, image_id);
925 940
926 if (image.info.num_samples > 1) { 941 if (image.info.num_samples > 1) {
927 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 942 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -958,7 +973,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
958 973
959template <class P> 974template <class P>
960ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { 975ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
961 if (!IsValidAddress(gpu_memory, config)) { 976 if (!IsValidEntry(gpu_memory, config)) {
962 return NULL_IMAGE_VIEW_ID; 977 return NULL_IMAGE_VIEW_ID;
963 } 978 }
964 const auto [pair, is_new] = image_views.try_emplace(config); 979 const auto [pair, is_new] = image_views.try_emplace(config);
@@ -1000,14 +1015,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
1000template <class P> 1015template <class P>
1001ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 1016ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1002 RelaxedOptions options) { 1017 RelaxedOptions options) {
1003 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1018 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1004 if (!cpu_addr) { 1019 if (!cpu_addr) {
1005 return ImageId{}; 1020 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1021 if (!cpu_addr) {
1022 return ImageId{};
1023 }
1006 } 1024 }
1007 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1025 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1008 const bool native_bgr = runtime.HasNativeBgr(); 1026 const bool native_bgr = runtime.HasNativeBgr();
1009 ImageId image_id; 1027 ImageId image_id;
1010 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1028 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1029 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1030 return false;
1031 }
1011 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { 1032 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1012 const bool strict_size = False(options & RelaxedOptions::Size) && 1033 const bool strict_size = False(options & RelaxedOptions::Size) &&
1013 True(existing_image.flags & ImageFlagBits::Strong); 1034 True(existing_image.flags & ImageFlagBits::Strong);
@@ -1033,7 +1054,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1033template <class P> 1054template <class P>
1034ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 1055ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1035 RelaxedOptions options) { 1056 RelaxedOptions options) {
1036 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1057 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1058 if (!cpu_addr) {
1059 const auto size = CalculateGuestSizeInBytes(info);
1060 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
1061 if (!cpu_addr) {
1062 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
1063 virtual_invalid_space += Common::AlignUp(size, 32);
1064 cpu_addr = std::optional<VAddr>(fake_addr);
1065 }
1066 }
1037 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); 1067 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
1038 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); 1068 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
1039 const Image& image = slot_images[image_id]; 1069 const Image& image = slot_images[image_id];
@@ -1053,11 +1083,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1053 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1083 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1054 const bool native_bgr = runtime.HasNativeBgr(); 1084 const bool native_bgr = runtime.HasNativeBgr();
1055 std::vector<ImageId> overlap_ids; 1085 std::vector<ImageId> overlap_ids;
1086 std::unordered_set<ImageId> overlaps_found;
1056 std::vector<ImageId> left_aliased_ids; 1087 std::vector<ImageId> left_aliased_ids;
1057 std::vector<ImageId> right_aliased_ids; 1088 std::vector<ImageId> right_aliased_ids;
1089 std::unordered_set<ImageId> ignore_textures;
1058 std::vector<ImageId> bad_overlap_ids; 1090 std::vector<ImageId> bad_overlap_ids;
1059 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { 1091 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
1060 if (info.type != overlap.info.type) { 1092 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1093 ignore_textures.insert(overlap_id);
1061 return; 1094 return;
1062 } 1095 }
1063 if (info.type == ImageType::Linear) { 1096 if (info.type == ImageType::Linear) {
@@ -1067,6 +1100,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1067 } 1100 }
1068 return; 1101 return;
1069 } 1102 }
1103 overlaps_found.insert(overlap_id);
1070 static constexpr bool strict_size = true; 1104 static constexpr bool strict_size = true;
1071 const std::optional<OverlapResult> solution = ResolveOverlap( 1105 const std::optional<OverlapResult> solution = ResolveOverlap(
1072 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); 1106 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1090,12 +1124,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1090 bad_overlap_ids.push_back(overlap_id); 1124 bad_overlap_ids.push_back(overlap_id);
1091 overlap.flags |= ImageFlagBits::BadOverlap; 1125 overlap.flags |= ImageFlagBits::BadOverlap;
1092 } 1126 }
1093 }); 1127 };
1128 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1129 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1130 if (!overlaps_found.contains(overlap_id)) {
1131 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1132 ignore_textures.insert(overlap_id);
1133 }
1134 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
1135 ignore_textures.insert(overlap_id);
1136 }
1137 }
1138 };
1139 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
1094 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1140 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1095 Image& new_image = slot_images[new_image_id]; 1141 Image& new_image = slot_images[new_image_id];
1096 1142
1143 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
1144 new_image.flags |= ImageFlagBits::Sparse;
1145 }
1146
1147 for (const ImageId overlap_id : ignore_textures) {
1148 Image& overlap = slot_images[overlap_id];
1149 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1150 UNIMPLEMENTED();
1151 }
1152 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1153 UntrackImage(overlap, overlap_id);
1154 }
1155 UnregisterImage(overlap_id);
1156 DeleteImage(overlap_id);
1157 }
1158
1097 // TODO: Only upload what we need 1159 // TODO: Only upload what we need
1098 RefreshContents(new_image); 1160 RefreshContents(new_image, new_image_id);
1099 1161
1100 for (const ImageId overlap_id : overlap_ids) { 1162 for (const ImageId overlap_id : overlap_ids) {
1101 Image& overlap = slot_images[overlap_id]; 1163 Image& overlap = slot_images[overlap_id];
@@ -1107,7 +1169,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1107 runtime.CopyImage(new_image, overlap, copies); 1169 runtime.CopyImage(new_image, overlap, copies);
1108 } 1170 }
1109 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1171 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1110 UntrackImage(overlap); 1172 UntrackImage(overlap, overlap_id);
1111 } 1173 }
1112 UnregisterImage(overlap_id); 1174 UnregisterImage(overlap_id);
1113 DeleteImage(overlap_id); 1175 DeleteImage(overlap_id);
@@ -1242,7 +1304,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1242 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; 1304 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1243 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1305 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1244 boost::container::small_vector<ImageId, 32> images; 1306 boost::container::small_vector<ImageId, 32> images;
1245 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { 1307 boost::container::small_vector<ImageMapId, 32> maps;
1308 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
1246 const auto it = page_table.find(page); 1309 const auto it = page_table.find(page);
1247 if (it == page_table.end()) { 1310 if (it == page_table.end()) {
1248 if constexpr (BOOL_BREAK) { 1311 if constexpr (BOOL_BREAK) {
@@ -1251,12 +1314,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1251 return; 1314 return;
1252 } 1315 }
1253 } 1316 }
1317 for (const ImageMapId map_id : it->second) {
1318 ImageMapView& map = slot_map_views[map_id];
1319 if (map.picked) {
1320 continue;
1321 }
1322 if (!map.Overlaps(cpu_addr, size)) {
1323 continue;
1324 }
1325 map.picked = true;
1326 maps.push_back(map_id);
1327 Image& image = slot_images[map.image_id];
1328 if (True(image.flags & ImageFlagBits::Picked)) {
1329 continue;
1330 }
1331 image.flags |= ImageFlagBits::Picked;
1332 images.push_back(map.image_id);
1333 if constexpr (BOOL_BREAK) {
1334 if (func(map.image_id, image)) {
1335 return true;
1336 }
1337 } else {
1338 func(map.image_id, image);
1339 }
1340 }
1341 if constexpr (BOOL_BREAK) {
1342 return false;
1343 }
1344 });
1345 for (const ImageId image_id : images) {
1346 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1347 }
1348 for (const ImageMapId map_id : maps) {
1349 slot_map_views[map_id].picked = false;
1350 }
1351}
1352
1353template <class P>
1354template <typename Func>
1355void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
1356 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1357 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1358 boost::container::small_vector<ImageId, 8> images;
1359 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1360 const auto it = gpu_page_table.find(page);
1361 if (it == gpu_page_table.end()) {
1362 if constexpr (BOOL_BREAK) {
1363 return false;
1364 } else {
1365 return;
1366 }
1367 }
1368 for (const ImageId image_id : it->second) {
1369 Image& image = slot_images[image_id];
1370 if (True(image.flags & ImageFlagBits::Picked)) {
1371 continue;
1372 }
1373 if (!image.OverlapsGPU(gpu_addr, size)) {
1374 continue;
1375 }
1376 image.flags |= ImageFlagBits::Picked;
1377 images.push_back(image_id);
1378 if constexpr (BOOL_BREAK) {
1379 if (func(image_id, image)) {
1380 return true;
1381 }
1382 } else {
1383 func(image_id, image);
1384 }
1385 }
1386 if constexpr (BOOL_BREAK) {
1387 return false;
1388 }
1389 });
1390 for (const ImageId image_id : images) {
1391 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1392 }
1393}
1394
1395template <class P>
1396template <typename Func>
1397void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
1398 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1399 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1400 boost::container::small_vector<ImageId, 8> images;
1401 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1402 const auto it = sparse_page_table.find(page);
1403 if (it == sparse_page_table.end()) {
1404 if constexpr (BOOL_BREAK) {
1405 return false;
1406 } else {
1407 return;
1408 }
1409 }
1254 for (const ImageId image_id : it->second) { 1410 for (const ImageId image_id : it->second) {
1255 Image& image = slot_images[image_id]; 1411 Image& image = slot_images[image_id];
1256 if (True(image.flags & ImageFlagBits::Picked)) { 1412 if (True(image.flags & ImageFlagBits::Picked)) {
1257 continue; 1413 continue;
1258 } 1414 }
1259 if (!image.Overlaps(cpu_addr, size)) { 1415 if (!image.OverlapsGPU(gpu_addr, size)) {
1260 continue; 1416 continue;
1261 } 1417 }
1262 image.flags |= ImageFlagBits::Picked; 1418 image.flags |= ImageFlagBits::Picked;
@@ -1279,6 +1435,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1279} 1435}
1280 1436
1281template <class P> 1437template <class P>
1438template <typename Func>
1439void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1440 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1441 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1442 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1443 for (auto& segment : segments) {
1444 const auto gpu_addr = segment.first;
1445 const auto size = segment.second;
1446 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1447 ASSERT(cpu_addr);
1448 if constexpr (RETURNS_BOOL) {
1449 if (func(gpu_addr, *cpu_addr, size)) {
1450 break;
1451 }
1452 } else {
1453 func(gpu_addr, *cpu_addr, size);
1454 }
1455 }
1456}
1457
1458template <class P>
1282ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { 1459ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1283 Image& image = slot_images[image_id]; 1460 Image& image = slot_images[image_id];
1284 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { 1461 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1295,8 +1472,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1295 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), 1472 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1296 "Trying to register an already registered image"); 1473 "Trying to register an already registered image");
1297 image.flags |= ImageFlagBits::Registered; 1474 image.flags |= ImageFlagBits::Registered;
1298 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1299 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1300 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); 1475 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1301 if ((IsPixelFormatASTC(image.info.format) && 1476 if ((IsPixelFormatASTC(image.info.format) &&
1302 True(image.flags & ImageFlagBits::AcceleratedUpload)) || 1477 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
@@ -1304,6 +1479,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1304 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1479 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1305 } 1480 }
1306 total_used_memory += Common::AlignUp(tentative_size, 1024); 1481 total_used_memory += Common::AlignUp(tentative_size, 1024);
1482 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1483 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1484 if (False(image.flags & ImageFlagBits::Sparse)) {
1485 auto map_id =
1486 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1487 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1488 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1489 image.map_view_id = map_id;
1490 return;
1491 }
1492 std::vector<ImageViewId> sparse_maps{};
1493 ForEachSparseSegment(
1494 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1495 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1496 ForEachCPUPage(cpu_addr, size,
1497 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1498 sparse_maps.push_back(map_id);
1499 });
1500 sparse_views.emplace(image_id, std::move(sparse_maps));
1501 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1502 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1307} 1503}
1308 1504
1309template <class P> 1505template <class P>
@@ -1320,34 +1516,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1320 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1516 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1321 } 1517 }
1322 total_used_memory -= Common::AlignUp(tentative_size, 1024); 1518 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1323 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { 1519 const auto& clear_page_table =
1324 const auto page_it = page_table.find(page); 1520 [this, image_id](
1325 if (page_it == page_table.end()) { 1521 u64 page,
1326 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); 1522 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
1327 return; 1523 const auto page_it = selected_page_table.find(page);
1328 } 1524 if (page_it == selected_page_table.end()) {
1329 std::vector<ImageId>& image_ids = page_it->second; 1525 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1330 const auto vector_it = std::ranges::find(image_ids, image_id); 1526 return;
1331 if (vector_it == image_ids.end()) { 1527 }
1332 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); 1528 std::vector<ImageId>& image_ids = page_it->second;
1333 return; 1529 const auto vector_it = std::ranges::find(image_ids, image_id);
1334 } 1530 if (vector_it == image_ids.end()) {
1335 image_ids.erase(vector_it); 1531 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1532 page << PAGE_BITS);
1533 return;
1534 }
1535 image_ids.erase(vector_it);
1536 };
1537 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1538 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1539 if (False(image.flags & ImageFlagBits::Sparse)) {
1540 const auto map_id = image.map_view_id;
1541 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1542 const auto page_it = page_table.find(page);
1543 if (page_it == page_table.end()) {
1544 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1545 return;
1546 }
1547 std::vector<ImageMapId>& image_map_ids = page_it->second;
1548 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1549 if (vector_it == image_map_ids.end()) {
1550 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1551 page << PAGE_BITS);
1552 return;
1553 }
1554 image_map_ids.erase(vector_it);
1555 });
1556 slot_map_views.erase(map_id);
1557 return;
1558 }
1559 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1560 clear_page_table(page, sparse_page_table);
1336 }); 1561 });
1562 auto it = sparse_views.find(image_id);
1563 ASSERT(it != sparse_views.end());
1564 auto& sparse_maps = it->second;
1565 for (auto& map_view_id : sparse_maps) {
1566 const auto& map_range = slot_map_views[map_view_id];
1567 const VAddr cpu_addr = map_range.cpu_addr;
1568 const std::size_t size = map_range.size;
1569 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1570 const auto page_it = page_table.find(page);
1571 if (page_it == page_table.end()) {
1572 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1573 return;
1574 }
1575 std::vector<ImageMapId>& image_map_ids = page_it->second;
1576 auto vector_it = image_map_ids.begin();
1577 while (vector_it != image_map_ids.end()) {
1578 ImageMapView& map = slot_map_views[*vector_it];
1579 if (map.image_id != image_id) {
1580 vector_it++;
1581 continue;
1582 }
1583 if (!map.picked) {
1584 map.picked = true;
1585 }
1586 vector_it = image_map_ids.erase(vector_it);
1587 }
1588 });
1589 slot_map_views.erase(map_view_id);
1590 }
1591 sparse_views.erase(it);
1337} 1592}
1338 1593
1339template <class P> 1594template <class P>
1340void TextureCache<P>::TrackImage(ImageBase& image) { 1595void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
1341 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 1596 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1342 image.flags |= ImageFlagBits::Tracked; 1597 image.flags |= ImageFlagBits::Tracked;
1343 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); 1598 if (False(image.flags & ImageFlagBits::Sparse)) {
1599 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1600 return;
1601 }
1602 if (True(image.flags & ImageFlagBits::Registered)) {
1603 auto it = sparse_views.find(image_id);
1604 ASSERT(it != sparse_views.end());
1605 auto& sparse_maps = it->second;
1606 for (auto& map_view_id : sparse_maps) {
1607 const auto& map = slot_map_views[map_view_id];
1608 const VAddr cpu_addr = map.cpu_addr;
1609 const std::size_t size = map.size;
1610 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1611 }
1612 return;
1613 }
1614 ForEachSparseSegment(image,
1615 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1616 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1617 });
1344} 1618}
1345 1619
1346template <class P> 1620template <class P>
1347void TextureCache<P>::UntrackImage(ImageBase& image) { 1621void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
1348 ASSERT(True(image.flags & ImageFlagBits::Tracked)); 1622 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1349 image.flags &= ~ImageFlagBits::Tracked; 1623 image.flags &= ~ImageFlagBits::Tracked;
1350 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); 1624 if (False(image.flags & ImageFlagBits::Sparse)) {
1625 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1626 return;
1627 }
1628 ASSERT(True(image.flags & ImageFlagBits::Registered));
1629 auto it = sparse_views.find(image_id);
1630 ASSERT(it != sparse_views.end());
1631 auto& sparse_maps = it->second;
1632 for (auto& map_view_id : sparse_maps) {
1633 const auto& map = slot_map_views[map_view_id];
1634 const VAddr cpu_addr = map.cpu_addr;
1635 const std::size_t size = map.size;
1636 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1637 }
1351} 1638}
1352 1639
1353template <class P> 1640template <class P>
@@ -1489,10 +1776,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
1489 if (invalidate) { 1776 if (invalidate) {
1490 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); 1777 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1491 if (False(image.flags & ImageFlagBits::Tracked)) { 1778 if (False(image.flags & ImageFlagBits::Tracked)) {
1492 TrackImage(image); 1779 TrackImage(image, image_id);
1493 } 1780 }
1494 } else { 1781 } else {
1495 RefreshContents(image); 1782 RefreshContents(image, image_id);
1496 SynchronizeAliases(image_id); 1783 SynchronizeAliases(image_id);
1497 } 1784 }
1498 if (is_modification) { 1785 if (is_modification) {
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index c9571f7e4..9fbdc1ac6 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
16constexpr SlotId CORRUPT_ID{0xfffffffe}; 16constexpr SlotId CORRUPT_ID{0xfffffffe};
17 17
18using ImageId = SlotId; 18using ImageId = SlotId;
19using ImageMapId = SlotId;
19using ImageViewId = SlotId; 20using ImageViewId = SlotId;
20using ImageAllocId = SlotId; 21using ImageAllocId = SlotId;
21using SamplerId = SlotId; 22using SamplerId = SlotId;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 4efe042b6..c872517b8 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -394,7 +394,7 @@ template <u32 GOB_EXTENT>
394 const s32 mip_offset = diff % layer_stride; 394 const s32 mip_offset = diff % layer_stride;
395 const std::array offsets = CalculateMipLevelOffsets(new_info); 395 const std::array offsets = CalculateMipLevelOffsets(new_info);
396 const auto end = offsets.begin() + new_info.resources.levels; 396 const auto end = offsets.begin() + new_info.resources.levels;
397 const auto it = std::find(offsets.begin(), end, mip_offset); 397 const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset));
398 if (it == end) { 398 if (it == end) {
399 // Mipmap is not aligned to any valid size 399 // Mipmap is not aligned to any valid size
400 return std::nullopt; 400 return std::nullopt;
@@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
664 return offsets; 664 return offsets;
665} 665}
666 666
667LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
668 const u32 num_levels = info.resources.levels;
669 const LevelInfo level_info = MakeLevelInfo(info);
670 LevelArray sizes{};
671 for (u32 level = 0; level < num_levels; ++level) {
672 sizes[level] = CalculateLevelSize(level_info, level);
673 }
674 return sizes;
675}
676
667std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { 677std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
668 ASSERT(info.type == ImageType::e3D); 678 ASSERT(info.type == ImageType::e3D);
669 std::vector<u32> offsets; 679 std::vector<u32> offsets;
@@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
776 return copies; 786 return copies;
777} 787}
778 788
779bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { 789bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
780 if (config.Address() == 0) { 790 const GPUVAddr address = config.Address();
791 if (address == 0) {
781 return false; 792 return false;
782 } 793 }
783 if (config.Address() > (u64(1) << 48)) { 794 if (address > (1ULL << 48)) {
784 return false; 795 return false;
785 } 796 }
786 return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); 797 if (gpu_memory.GpuToCpuAddress(address).has_value()) {
798 return true;
799 }
800 const ImageInfo info{config};
801 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
802 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
787} 803}
788 804
789std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 805std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index cdc5cbc75..766502908 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,6 +40,8 @@ struct OverlapResult {
40 40
41[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; 41[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
42 42
43[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
44
43[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); 45[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
44 46
45[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); 47[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -55,7 +57,7 @@ struct OverlapResult {
55 const ImageInfo& src, 57 const ImageInfo& src,
56 SubresourceBase base); 58 SubresourceBase base);
57 59
58[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 60[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
59 61
60[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 62[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
61 GPUVAddr gpu_addr, const ImageInfo& info, 63 GPUVAddr gpu_addr, const ImageInfo& info,
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 7b756ba41..3ab500760 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1365,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1365 // each partition. 1365 // each partition.
1366 1366
1367 // Determine partitions, partition index, and color endpoint modes 1367 // Determine partitions, partition index, and color endpoint modes
1368 s32 planeIdx = -1; 1368 u32 planeIdx{UINT32_MAX};
1369 u32 partitionIndex; 1369 u32 partitionIndex{};
1370 u32 colorEndpointMode[4] = {0, 0, 0, 0}; 1370 u32 colorEndpointMode[4] = {0, 0, 0, 0};
1371 1371
1372 // Define color data. 1372 // Define color data.
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 23814afd2..f214510da 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
532 return (supported_usage & wanted_usage) == wanted_usage; 532 return (supported_usage & wanted_usage) == wanted_usage;
533} 533}
534 534
535std::string Device::GetDriverName() const {
536 switch (driver_id) {
537 case VK_DRIVER_ID_AMD_PROPRIETARY:
538 return "AMD";
539 case VK_DRIVER_ID_AMD_OPEN_SOURCE:
540 return "AMDVLK";
541 case VK_DRIVER_ID_MESA_RADV:
542 return "RADV";
543 case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
544 return "NVIDIA";
545 case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS:
546 return "INTEL";
547 case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
548 return "ANV";
549 case VK_DRIVER_ID_MESA_LLVMPIPE:
550 return "LAVAPIPE";
551 default:
552 return vendor_name;
553 }
554}
555
535void Device::CheckSuitability(bool requires_swapchain) const { 556void Device::CheckSuitability(bool requires_swapchain) const {
536 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; 557 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
537 bool has_swapchain = false; 558 bool has_swapchain = false;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 88b298196..96c0f8c60 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -45,6 +45,9 @@ public:
45 /// Reports a shader to Nsight Aftermath. 45 /// Reports a shader to Nsight Aftermath.
46 void SaveShader(const std::vector<u32>& spirv) const; 46 void SaveShader(const std::vector<u32>& spirv) const;
47 47
48 /// Returns the name of the VkDriverId reported from Vulkan.
49 std::string GetDriverName() const;
50
48 /// Returns the dispatch loader with direct function pointers of the device. 51 /// Returns the dispatch loader with direct function pointers of the device.
49 const vk::DeviceDispatch& GetDispatchLoader() const { 52 const vk::DeviceDispatch& GetDispatchLoader() const {
50 return dld; 53 return dld;
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index efdc6aa50..7a6f84d96 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
143} 143}
144 144
145void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { 145void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
146 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); 146 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
147 ev->accept(); 147 ev->accept();
148} 148}
149 149
150void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { 150void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
151 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); 151 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
152 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 152 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
153 ev->accept(); 153 ev->accept();
154} 154}
155 155
156void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { 156void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
157 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); 157 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
158 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 158 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
159 ev->accept(); 159 ev->accept();
160} 160}
161 161
162void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { 162void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
163 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120); 163 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale,
164 ev->angleDelta().y() / 120);
164 ev->accept(); 165 ev->accept();
165} 166}
166 167
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 9c5aeb833..218b4782b 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -522,7 +522,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
522 QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); 522 QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
523 remove_menu->addSeparator(); 523 remove_menu->addSeparator();
524 QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); 524 QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
525 QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS")); 525 QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
526 QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
527 QAction* dump_romfs_sdmc = dump_romfs_menu->addAction(tr("Dump RomFS to SDMC"));
526 QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard")); 528 QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard"));
527 QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry")); 529 QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));
528 context_menu.addSeparator(); 530 context_menu.addSeparator();
@@ -571,8 +573,12 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
571 connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { 573 connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() {
572 emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); 574 emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path);
573 }); 575 });
574 connect(dump_romfs, &QAction::triggered, 576 connect(dump_romfs, &QAction::triggered, [this, program_id, path]() {
575 [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); }); 577 emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal);
578 });
579 connect(dump_romfs_sdmc, &QAction::triggered, [this, program_id, path]() {
580 emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::SDMC);
581 });
576 connect(copy_tid, &QAction::triggered, 582 connect(copy_tid, &QAction::triggered,
577 [this, program_id]() { emit CopyTIDRequested(program_id); }); 583 [this, program_id]() { emit CopyTIDRequested(program_id); });
578 connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() { 584 connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() {
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index b630e34ff..50402da51 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -45,6 +45,11 @@ enum class GameListRemoveTarget {
45 CustomConfiguration, 45 CustomConfiguration,
46}; 46};
47 47
48enum class DumpRomFSTarget {
49 Normal,
50 SDMC,
51};
52
48enum class InstalledEntryType { 53enum class InstalledEntryType {
49 Game, 54 Game,
50 Update, 55 Update,
@@ -92,7 +97,7 @@ signals:
92 void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type); 97 void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type);
93 void RemoveFileRequested(u64 program_id, GameListRemoveTarget target, 98 void RemoveFileRequested(u64 program_id, GameListRemoveTarget target,
94 const std::string& game_path); 99 const std::string& game_path);
95 void DumpRomFSRequested(u64 program_id, const std::string& game_path); 100 void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
96 void CopyTIDRequested(u64 program_id); 101 void CopyTIDRequested(u64 program_id);
97 void NavigateToGamedbEntryRequested(u64 program_id, 102 void NavigateToGamedbEntryRequested(u64 program_id,
98 const CompatibilityList& compatibility_list); 103 const CompatibilityList& compatibility_list);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index cb9d7a863..5ed3b90b8 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -104,6 +104,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
104#include "input_common/main.h" 104#include "input_common/main.h"
105#include "util/overlay_dialog.h" 105#include "util/overlay_dialog.h"
106#include "video_core/gpu.h" 106#include "video_core/gpu.h"
107#include "video_core/renderer_base.h"
107#include "video_core/shader_notify.h" 108#include "video_core/shader_notify.h"
108#include "yuzu/about_dialog.h" 109#include "yuzu/about_dialog.h"
109#include "yuzu/bootmanager.h" 110#include "yuzu/bootmanager.h"
@@ -1426,8 +1427,12 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S
1426 title_name = Common::FS::PathToUTF8String( 1427 title_name = Common::FS::PathToUTF8String(
1427 std::filesystem::path{filename.toStdU16String()}.filename()); 1428 std::filesystem::path{filename.toStdU16String()}.filename());
1428 } 1429 }
1430 const bool is_64bit = system.Kernel().CurrentProcess()->Is64BitProcess();
1431 const auto instruction_set_suffix = is_64bit ? " (64-bit)" : " (32-bit)";
1432 title_name += instruction_set_suffix;
1429 LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version); 1433 LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version);
1430 UpdateWindowTitle(title_name, title_version); 1434 const auto gpu_vendor = system.GPU().Renderer().GetDeviceVendor();
1435 UpdateWindowTitle(title_name, title_version, gpu_vendor);
1431 1436
1432 loading_screen->Prepare(system.GetAppLoader()); 1437 loading_screen->Prepare(system.GetAppLoader());
1433 loading_screen->show(); 1438 loading_screen->show();
@@ -1881,7 +1886,8 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g
1881 } 1886 }
1882} 1887}
1883 1888
1884void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) { 1889void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path,
1890 DumpRomFSTarget target) {
1885 const auto failed = [this] { 1891 const auto failed = [this] {
1886 QMessageBox::warning(this, tr("RomFS Extraction Failed!"), 1892 QMessageBox::warning(this, tr("RomFS Extraction Failed!"),
1887 tr("There was an error copying the RomFS files or the user " 1893 tr("There was an error copying the RomFS files or the user "
@@ -1909,7 +1915,10 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
1909 return; 1915 return;
1910 } 1916 }
1911 1917
1912 const auto dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir); 1918 const auto dump_dir =
1919 target == DumpRomFSTarget::Normal
1920 ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)
1921 : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents";
1913 const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id); 1922 const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id);
1914 1923
1915 const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir); 1924 const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir);
@@ -1919,7 +1928,8 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
1919 if (*romfs_title_id == program_id) { 1928 if (*romfs_title_id == program_id) {
1920 const u64 ivfc_offset = loader->ReadRomFSIVFCOffset(); 1929 const u64 ivfc_offset = loader->ReadRomFSIVFCOffset();
1921 const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed}; 1930 const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed};
1922 romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program); 1931 romfs =
1932 pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program, nullptr, false);
1923 } else { 1933 } else {
1924 romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS(); 1934 romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS();
1925 } 1935 }
@@ -2858,8 +2868,8 @@ void GMainWindow::MigrateConfigFiles() {
2858 } 2868 }
2859} 2869}
2860 2870
2861void GMainWindow::UpdateWindowTitle(const std::string& title_name, 2871void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version,
2862 const std::string& title_version) { 2872 std::string_view gpu_vendor) {
2863 const auto branch_name = std::string(Common::g_scm_branch); 2873 const auto branch_name = std::string(Common::g_scm_branch);
2864 const auto description = std::string(Common::g_scm_desc); 2874 const auto description = std::string(Common::g_scm_desc);
2865 const auto build_id = std::string(Common::g_build_id); 2875 const auto build_id = std::string(Common::g_build_id);
@@ -2872,7 +2882,8 @@ void GMainWindow::UpdateWindowTitle(const std::string& title_name,
2872 if (title_name.empty()) { 2882 if (title_name.empty()) {
2873 setWindowTitle(QString::fromStdString(window_title)); 2883 setWindowTitle(QString::fromStdString(window_title));
2874 } else { 2884 } else {
2875 const auto run_title = fmt::format("{} | {} | {}", window_title, title_name, title_version); 2885 const auto run_title =
2886 fmt::format("{} | {} | {} | {}", window_title, title_name, title_version, gpu_vendor);
2876 setWindowTitle(QString::fromStdString(run_title)); 2887 setWindowTitle(QString::fromStdString(run_title));
2877 } 2888 }
2878} 2889}
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 11f152cbe..45c8310e1 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -34,6 +34,7 @@ class QProgressDialog;
34class WaitTreeWidget; 34class WaitTreeWidget;
35enum class GameListOpenTarget; 35enum class GameListOpenTarget;
36enum class GameListRemoveTarget; 36enum class GameListRemoveTarget;
37enum class DumpRomFSTarget;
37enum class InstalledEntryType; 38enum class InstalledEntryType;
38class GameListPlaceholder; 39class GameListPlaceholder;
39 40
@@ -244,7 +245,7 @@ private slots:
244 void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type); 245 void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type);
245 void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target, 246 void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target,
246 const std::string& game_path); 247 const std::string& game_path);
247 void OnGameListDumpRomFS(u64 program_id, const std::string& game_path); 248 void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target);
248 void OnGameListCopyTID(u64 program_id); 249 void OnGameListCopyTID(u64 program_id);
249 void OnGameListNavigateToGamedbEntry(u64 program_id, 250 void OnGameListNavigateToGamedbEntry(u64 program_id,
250 const CompatibilityList& compatibility_list); 251 const CompatibilityList& compatibility_list);
@@ -287,8 +288,8 @@ private:
287 InstallResult InstallNSPXCI(const QString& filename); 288 InstallResult InstallNSPXCI(const QString& filename);
288 InstallResult InstallNCA(const QString& filename); 289 InstallResult InstallNCA(const QString& filename);
289 void MigrateConfigFiles(); 290 void MigrateConfigFiles();
290 void UpdateWindowTitle(const std::string& title_name = {}, 291 void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {},
291 const std::string& title_version = {}); 292 std::string_view gpu_vendor = {});
292 void UpdateStatusBar(); 293 void UpdateStatusBar();
293 void UpdateStatusButtons(); 294 void UpdateStatusButtons();
294 void UpdateUISettings(); 295 void UpdateUISettings();