summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt2
-rw-r--r--src/audio_core/command_generator.cpp357
-rw-r--r--src/audio_core/command_generator.h5
-rw-r--r--src/audio_core/common.h23
-rw-r--r--src/audio_core/delay_line.cpp104
-rw-r--r--src/audio_core/delay_line.h46
-rw-r--r--src/audio_core/effect_context.cpp22
-rw-r--r--src/audio_core/effect_context.h31
-rw-r--r--src/common/CMakeLists.txt3
-rw-r--r--src/common/uint128.cpp71
-rw-r--r--src/common/uint128.h89
-rw-r--r--src/common/wall_clock.cpp17
-rw-r--r--src/common/x64/native_clock.cpp58
-rw-r--r--src/core/CMakeLists.txt9
-rw-r--r--src/core/core_timing_util.cpp84
-rw-r--r--src/core/core_timing_util.h61
-rw-r--r--src/core/frontend/applets/controller.h1
-rw-r--r--src/core/hle/kernel/client_port.cpp4
-rw-r--r--src/core/hle/kernel/client_session.cpp4
-rw-r--r--src/core/hle/kernel/errors.h43
-rw-r--r--src/core/hle/kernel/handle_table.cpp10
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp2
-rw-r--r--src/core/hle/kernel/k_address_arbiter.cpp28
-rw-r--r--src/core/hle/kernel/k_condition_variable.cpp20
-rw-r--r--src/core/hle/kernel/k_readable_event.cpp3
-rw-r--r--src/core/hle/kernel/k_resource_limit.cpp2
-rw-r--r--src/core/hle/kernel/k_scoped_resource_reservation.h67
-rw-r--r--src/core/hle/kernel/k_synchronization_object.cpp8
-rw-r--r--src/core/hle/kernel/k_thread.cpp30
-rw-r--r--src/core/hle/kernel/kernel.cpp17
-rw-r--r--src/core/hle/kernel/memory/memory_manager.cpp6
-rw-r--r--src/core/hle/kernel/memory/page_table.cpp85
-rw-r--r--src/core/hle/kernel/process.cpp39
-rw-r--r--src/core/hle/kernel/process_capability.cpp34
-rw-r--r--src/core/hle/kernel/server_port.cpp4
-rw-r--r--src/core/hle/kernel/session.cpp11
-rw-r--r--src/core/hle/kernel/shared_memory.cpp11
-rw-r--r--src/core/hle/kernel/svc.cpp244
-rw-r--r--src/core/hle/kernel/svc_results.h21
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp2
-rw-r--r--src/core/hle/service/am/am.cpp13
-rw-r--r--src/core/hle/service/am/applets/controller.cpp3
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp4
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp63
-rw-r--r--src/core/hle/service/hid/controllers/npad.h26
-rw-r--r--src/core/hle/service/ldn/errors.h13
-rw-r--r--src/core/hle/service/ldn/ldn.cpp36
-rw-r--r--src/core/hle/service/ldr/ldr.cpp6
-rw-r--r--src/core/hle/service/nfp/nfp.cpp2
-rw-r--r--src/core/hle/service/sockets/bsd.cpp6
-rw-r--r--src/core/settings.h5
-rw-r--r--src/input_common/mouse/mouse_input.cpp48
-rw-r--r--src/input_common/mouse/mouse_input.h7
-rw-r--r--src/input_common/mouse/mouse_poller.cpp3
-rw-r--r--src/input_common/sdl/sdl_impl.cpp7
-rw-r--r--src/input_common/settings.h1
-rw-r--r--src/tests/video_core/buffer_base.cpp76
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/buffer_cache/buffer_base.h217
-rw-r--r--src/video_core/buffer_cache/buffer_block.h62
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1656
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h93
-rw-r--r--src/video_core/command_classes/vic.cpp3
-rw-r--r--src/video_core/dirty_flags.cpp29
-rw-r--r--src/video_core/dirty_flags.h8
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp4
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/kepler_memory.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp21
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/fence_manager.h4
-rw-r--r--src/video_core/gpu.cpp8
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/gpu_thread.cpp12
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_quad_array.comp28
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp9
-rw-r--r--src/video_core/memory_manager.cpp4
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_base.h17
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h160
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp580
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h66
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp61
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h60
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp69
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h36
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h38
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h18
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp43
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h9
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp83
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h18
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp153
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h46
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp394
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h119
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp144
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h27
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp674
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h67
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp156
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp43
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp139
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h27
-rw-r--r--src/video_core/shader/async_shaders.cpp1
-rw-r--r--src/video_core/shader/async_shaders.h9
-rw-r--r--src/video_core/shader/decode/other.cpp1
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h58
-rw-r--r--src/video_core/video_core.cpp19
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp213
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h7
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp78
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp50
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h37
-rw-r--r--src/yuzu/CMakeLists.txt1
-rw-r--r--src/yuzu/applets/controller.cpp9
-rw-r--r--src/yuzu/bootmanager.cpp16
-rw-r--r--src/yuzu/configuration/config.cpp27
-rw-r--r--src/yuzu/configuration/config.h2
-rw-r--r--src/yuzu/configuration/configure_filesystem.cpp10
-rw-r--r--src/yuzu/configuration/configure_filesystem.h1
-rw-r--r--src/yuzu/configuration/configure_filesystem.ui35
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp22
-rw-r--r--src/yuzu/configuration/configure_input_advanced.cpp5
-rw-r--r--src/yuzu/configuration/configure_input_advanced.ui82
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp106
-rw-r--r--src/yuzu/configuration/configure_input_player.h6
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.cpp248
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.h6
-rw-r--r--src/yuzu/debugger/controller.cpp2
-rw-r--r--src/yuzu/main.cpp39
-rw-r--r--src/yuzu/main.ui8
-rw-r--r--src/yuzu/yuzu.qrc5
-rw-r--r--src/yuzu_cmd/CMakeLists.txt13
-rw-r--r--src/yuzu_cmd/config.cpp5
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp20
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.h3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp2
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
182 files changed, 5328 insertions, 4024 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d1d177b51..a0ae07752 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -15,6 +15,8 @@ add_library(audio_core STATIC
15 command_generator.cpp 15 command_generator.cpp
16 command_generator.h 16 command_generator.h
17 common.h 17 common.h
18 delay_line.cpp
19 delay_line.h
18 effect_context.cpp 20 effect_context.cpp
19 effect_context.h 21 effect_context.h
20 info_updater.cpp 22 info_updater.cpp
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 5b1065520..437cc5ccd 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cmath>
6#include <numbers>
5#include "audio_core/algorithm/interpolate.h" 7#include "audio_core/algorithm/interpolate.h"
6#include "audio_core/command_generator.h" 8#include "audio_core/command_generator.h"
7#include "audio_core/effect_context.h" 9#include "audio_core/effect_context.h"
@@ -13,6 +15,20 @@ namespace AudioCore {
13namespace { 15namespace {
14constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00; 16constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00;
15constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL; 17constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL;
18using DelayLineTimes = std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>;
19
20constexpr DelayLineTimes FDN_MIN_DELAY_LINE_TIMES{5.0f, 6.0f, 13.0f, 14.0f};
21constexpr DelayLineTimes FDN_MAX_DELAY_LINE_TIMES{45.704f, 82.782f, 149.94f, 271.58f};
22constexpr DelayLineTimes DECAY0_MAX_DELAY_LINE_TIMES{17.0f, 13.0f, 9.0f, 7.0f};
23constexpr DelayLineTimes DECAY1_MAX_DELAY_LINE_TIMES{19.0f, 11.0f, 10.0f, 6.0f};
24constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_TAP_TIMES{
25 0.017136f, 0.059154f, 0.161733f, 0.390186f, 0.425262f, 0.455411f, 0.689737f,
26 0.745910f, 0.833844f, 0.859502f, 0.000000f, 0.075024f, 0.168788f, 0.299901f,
27 0.337443f, 0.371903f, 0.599011f, 0.716741f, 0.817859f, 0.851664f};
28constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
29 0.67096f, 0.61027f, 1.0f, 0.35680f, 0.68361f, 0.65978f, 0.51939f,
30 0.24712f, 0.45945f, 0.45021f, 0.64196f, 0.54879f, 0.92925f, 0.38270f,
31 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
16 32
17template <std::size_t N> 33template <std::size_t N>
18void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { 34void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
@@ -65,6 +81,154 @@ s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
65 } 81 }
66} 82}
67 83
84float Pow10(float x) {
85 if (x >= 0.0f) {
86 return 1.0f;
87 } else if (x <= -5.3f) {
88 return 0.0f;
89 }
90 return std::pow(10.0f, x);
91}
92
93float SinD(float degrees) {
94 return std::sin(degrees * std::numbers::pi_v<float> / 180.0f);
95}
96
97float CosD(float degrees) {
98 return std::cos(degrees * std::numbers::pi_v<float> / 180.0f);
99}
100
101float ToFloat(s32 sample) {
102 return static_cast<float>(sample) / 65536.f;
103}
104
105s32 ToS32(float sample) {
106 constexpr auto min = -8388608.0f;
107 constexpr auto max = 8388607.f;
108 float rescaled_sample = sample * 65536.0f;
109 if (rescaled_sample < min) {
110 rescaled_sample = min;
111 }
112 if (rescaled_sample > max) {
113 rescaled_sample = max;
114 }
115 return static_cast<s32>(rescaled_sample);
116}
117
118constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_1CH{0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
120
121constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_2CH{0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
122 1, 1, 1, 0, 0, 0, 0, 1, 1, 1};
123
124constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_4CH{0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
125 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
126
127constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 2, 2, 2,
128 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
129
130template <std::size_t CHANNEL_COUNT>
131void ApplyReverbGeneric(I3dl2ReverbState& state,
132 const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
133 const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
134 s32 sample_count) {
135
136 auto GetTapLookup = []() {
137 if constexpr (CHANNEL_COUNT == 1) {
138 return REVERB_TAP_INDEX_1CH;
139 } else if constexpr (CHANNEL_COUNT == 2) {
140 return REVERB_TAP_INDEX_2CH;
141 } else if constexpr (CHANNEL_COUNT == 4) {
142 return REVERB_TAP_INDEX_4CH;
143 } else if constexpr (CHANNEL_COUNT == 6) {
144 return REVERB_TAP_INDEX_6CH;
145 }
146 };
147
148 const auto& tap_index_lut = GetTapLookup();
149 for (s32 sample = 0; sample < sample_count; sample++) {
150 std::array<f32, CHANNEL_COUNT> out_samples{};
151 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fsamp{};
152 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> mixed{};
153 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> osamp{};
154
155 // Mix everything into a single sample
156 s32 temp_mixed_sample = 0;
157 for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
158 temp_mixed_sample += input[i][sample];
159 }
160 const auto current_sample = ToFloat(temp_mixed_sample);
161 const auto early_tap = state.early_delay_line.TapOut(state.early_to_late_taps);
162
163 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_TAPS; i++) {
164 const auto tapped_samp =
165 state.early_delay_line.TapOut(state.early_tap_steps[i]) * EARLY_GAIN[i];
166 out_samples[tap_index_lut[i]] += tapped_samp;
167
168 if constexpr (CHANNEL_COUNT == 6) {
169 // handle lfe
170 out_samples[5] += tapped_samp;
171 }
172 }
173
174 state.lowpass_0 = current_sample * state.lowpass_2 + state.lowpass_0 * state.lowpass_1;
175 state.early_delay_line.Tick(state.lowpass_0);
176
177 for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
178 out_samples[i] *= state.early_gain;
179 }
180
181 // Two channel seems to apply a latet gain, we require to save this
182 f32 filter{};
183 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
184 filter = state.fdn_delay_line[i].GetOutputSample();
185 const auto computed = filter * state.lpf_coefficients[0][i] + state.shelf_filter[i];
186 state.shelf_filter[i] =
187 filter * state.lpf_coefficients[1][i] + computed * state.lpf_coefficients[2][i];
188 fsamp[i] = computed;
189 }
190
191 // Mixing matrix
192 mixed[0] = fsamp[1] + fsamp[2];
193 mixed[1] = -fsamp[0] - fsamp[3];
194 mixed[2] = fsamp[0] - fsamp[3];
195 mixed[3] = fsamp[1] - fsamp[2];
196
197 if constexpr (CHANNEL_COUNT == 2) {
198 for (auto& mix : mixed) {
199 mix *= (filter * state.late_gain);
200 }
201 }
202
203 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
204 const auto late = early_tap * state.late_gain;
205 osamp[i] = state.decay_delay_line0[i].Tick(late + mixed[i]);
206 osamp[i] = state.decay_delay_line1[i].Tick(osamp[i]);
207 state.fdn_delay_line[i].Tick(osamp[i]);
208 }
209
210 if constexpr (CHANNEL_COUNT == 1) {
211 output[0][sample] = ToS32(state.dry_gain * ToFloat(input[0][sample]) +
212 (out_samples[0] + osamp[0] + osamp[1]));
213 } else if constexpr (CHANNEL_COUNT == 2 || CHANNEL_COUNT == 4) {
214 for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
215 output[i][sample] =
216 ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
217 }
218 } else if constexpr (CHANNEL_COUNT == 6) {
219 const auto temp_center = state.center_delay_line.Tick(0.5f * (osamp[2] - osamp[3]));
220 for (std::size_t i = 0; i < 4; i++) {
221 output[i][sample] =
222 ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
223 }
224 output[4][sample] =
225 ToS32(state.dry_gain * ToFloat(input[4][sample]) + (out_samples[4] + temp_center));
226 output[5][sample] =
227 ToS32(state.dry_gain * ToFloat(input[5][sample]) + (out_samples[5] + osamp[3]));
228 }
229 }
230}
231
68} // namespace 232} // namespace
69 233
70CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_, 234CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_,
@@ -271,11 +435,10 @@ void CommandGenerator::GenerateBiquadFilterCommandForVoice(ServerVoiceInfo& voic
271 } 435 }
272 436
273 // Generate biquad filter 437 // Generate biquad filter
274 // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter, 438 // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
275 // dsp_state.biquad_filter_state, 439 // dsp_state.biquad_filter_state,
276 // mix_buffer_count + channel, mix_buffer_count + 440 // mix_buffer_count + channel, mix_buffer_count + channel,
277 // channel, worker_params.sample_count, 441 // worker_params.sample_count, voice_info.GetInParams().node_id);
278 // voice_info.GetInParams().node_id);
279 } 442 }
280} 443}
281 444
@@ -376,21 +539,54 @@ void CommandGenerator::GenerateEffectCommand(ServerMixInfo& mix_info) {
376 539
377void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info, 540void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info,
378 bool enabled) { 541 bool enabled) {
379 if (!enabled) { 542 auto* reverb = dynamic_cast<EffectI3dl2Reverb*>(info);
543 const auto& params = reverb->GetParams();
544 auto& state = reverb->GetState();
545 const auto channel_count = params.channel_count;
546
547 if (channel_count != 1 && channel_count != 2 && channel_count != 4 && channel_count != 6) {
380 return; 548 return;
381 } 549 }
382 const auto& params = dynamic_cast<EffectI3dl2Reverb*>(info)->GetParams(); 550
383 const auto channel_count = params.channel_count; 551 std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
552 std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
553
554 const auto status = params.status;
384 for (s32 i = 0; i < channel_count; i++) { 555 for (s32 i = 0; i < channel_count; i++) {
385 // TODO(ogniK): Actually implement reverb 556 input[i] = GetMixBuffer(mix_buffer_offset + params.input[i]);
386 /* 557 output[i] = GetMixBuffer(mix_buffer_offset + params.output[i]);
387 if (params.input[i] != params.output[i]) { 558 }
388 const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); 559
389 auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); 560 if (enabled) {
390 ApplyMix<1>(output, input, 32768, worker_params.sample_count); 561 if (status == ParameterStatus::Initialized) {
391 }*/ 562 InitializeI3dl2Reverb(reverb->GetParams(), state, info->GetWorkBuffer());
392 auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); 563 } else if (status == ParameterStatus::Updating) {
393 std::memset(output, 0, worker_params.sample_count * sizeof(s32)); 564 UpdateI3dl2Reverb(reverb->GetParams(), state, false);
565 }
566 }
567
568 if (enabled) {
569 switch (channel_count) {
570 case 1:
571 ApplyReverbGeneric<1>(state, input, output, worker_params.sample_count);
572 break;
573 case 2:
574 ApplyReverbGeneric<2>(state, input, output, worker_params.sample_count);
575 break;
576 case 4:
577 ApplyReverbGeneric<4>(state, input, output, worker_params.sample_count);
578 break;
579 case 6:
580 ApplyReverbGeneric<6>(state, input, output, worker_params.sample_count);
581 break;
582 }
583 } else {
584 for (s32 i = 0; i < channel_count; i++) {
585 // Only copy if the buffer input and output do not match!
586 if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
587 std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
588 }
589 }
394 } 590 }
395} 591}
396 592
@@ -528,6 +724,133 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
528 return sample_count; 724 return sample_count;
529} 725}
530 726
727void CommandGenerator::InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
728 std::vector<u8>& work_buffer) {
729 // Reset state
730 state.lowpass_0 = 0.0f;
731 state.lowpass_1 = 0.0f;
732 state.lowpass_2 = 0.0f;
733
734 state.early_delay_line.Reset();
735 state.early_tap_steps.fill(0);
736 state.early_gain = 0.0f;
737 state.late_gain = 0.0f;
738 state.early_to_late_taps = 0;
739 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
740 state.fdn_delay_line[i].Reset();
741 state.decay_delay_line0[i].Reset();
742 state.decay_delay_line1[i].Reset();
743 }
744 state.last_reverb_echo = 0.0f;
745 state.center_delay_line.Reset();
746 for (auto& coef : state.lpf_coefficients) {
747 coef.fill(0.0f);
748 }
749 state.shelf_filter.fill(0.0f);
750 state.dry_gain = 0.0f;
751
752 const auto sample_rate = info.sample_rate / 1000;
753 f32* work_buffer_ptr = reinterpret_cast<f32*>(work_buffer.data());
754
755 s32 delay_samples{};
756 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
757 delay_samples =
758 AudioCommon::CalculateDelaySamples(sample_rate, FDN_MAX_DELAY_LINE_TIMES[i]);
759 state.fdn_delay_line[i].Initialize(delay_samples, work_buffer_ptr);
760 work_buffer_ptr += delay_samples + 1;
761
762 delay_samples =
763 AudioCommon::CalculateDelaySamples(sample_rate, DECAY0_MAX_DELAY_LINE_TIMES[i]);
764 state.decay_delay_line0[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
765 work_buffer_ptr += delay_samples + 1;
766
767 delay_samples =
768 AudioCommon::CalculateDelaySamples(sample_rate, DECAY1_MAX_DELAY_LINE_TIMES[i]);
769 state.decay_delay_line1[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
770 work_buffer_ptr += delay_samples + 1;
771 }
772 delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 5.0f);
773 state.center_delay_line.Initialize(delay_samples, work_buffer_ptr);
774 work_buffer_ptr += delay_samples + 1;
775
776 delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 400.0f);
777 state.early_delay_line.Initialize(delay_samples, work_buffer_ptr);
778
779 UpdateI3dl2Reverb(info, state, true);
780}
781
782void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
783 bool should_clear) {
784
785 state.dry_gain = info.dry_gain;
786 state.shelf_filter.fill(0.0f);
787 state.lowpass_0 = 0.0f;
788 state.early_gain = Pow10(std::min(info.room + info.reflection, 5000.0f) / 2000.0f);
789 state.late_gain = Pow10(std::min(info.room + info.reverb, 5000.0f) / 2000.0f);
790
791 const auto sample_rate = info.sample_rate / 1000;
792 const f32 hf_gain = Pow10(info.room_hf / 2000.0f);
793 if (hf_gain >= 1.0f) {
794 state.lowpass_2 = 1.0f;
795 state.lowpass_1 = 0.0f;
796 } else {
797 const auto a = 1.0f - hf_gain;
798 const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
799 static_cast<f32>(info.sample_rate)));
800 const auto c = std::sqrt(b * b - 4.0f * a * a);
801
802 state.lowpass_1 = (b - c) / (2.0f * a);
803 state.lowpass_2 = 1.0f - state.lowpass_1;
804 }
805 state.early_to_late_taps = AudioCommon::CalculateDelaySamples(
806 sample_rate, 1000.0f * (info.reflection_delay + info.reverb_delay));
807
808 state.last_reverb_echo = 0.6f * info.diffusion * 0.01f;
809 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
810 const auto length =
811 FDN_MIN_DELAY_LINE_TIMES[i] +
812 (info.density / 100.0f) * (FDN_MAX_DELAY_LINE_TIMES[i] - FDN_MIN_DELAY_LINE_TIMES[i]);
813 state.fdn_delay_line[i].SetDelay(AudioCommon::CalculateDelaySamples(sample_rate, length));
814
815 const auto delay_sample_counts = state.fdn_delay_line[i].GetDelay() +
816 state.decay_delay_line0[i].GetDelay() +
817 state.decay_delay_line1[i].GetDelay();
818
819 float a = (-60.0f * static_cast<f32>(delay_sample_counts)) /
820 (info.decay_time * static_cast<f32>(info.sample_rate));
821 float b = a / info.hf_decay_ratio;
822 float c = CosD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)) /
823 SinD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate));
824 float d = Pow10((b - a) / 40.0f);
825 float e = Pow10((b + a) / 40.0f) * 0.7071f;
826
827 state.lpf_coefficients[0][i] = e * ((d * c) + 1.0f) / (c + d);
828 state.lpf_coefficients[1][i] = e * (1.0f - (d * c)) / (c + d);
829 state.lpf_coefficients[2][i] = (c - d) / (c + d);
830
831 state.decay_delay_line0[i].SetCoefficient(state.last_reverb_echo);
832 state.decay_delay_line1[i].SetCoefficient(-0.9f * state.last_reverb_echo);
833 }
834
835 if (should_clear) {
836 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
837 state.fdn_delay_line[i].Clear();
838 state.decay_delay_line0[i].Clear();
839 state.decay_delay_line1[i].Clear();
840 }
841 state.early_delay_line.Clear();
842 state.center_delay_line.Clear();
843 }
844
845 const auto max_early_delay = state.early_delay_line.GetMaxDelay();
846 const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
847 for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
848 const auto length = AudioCommon::CalculateDelaySamples(
849 sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
850 state.early_tap_steps[tap] = std::min(length, max_early_delay);
851 }
852}
853
531void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume, 854void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume,
532 s32 channel, s32 node_id) { 855 s32 channel, s32 node_id) {
533 const auto last = static_cast<s32>(last_volume * 32768.0f); 856 const auto last = static_cast<s32>(last_volume * 32768.0f);
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index b937350b1..2ebb755b0 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -21,6 +21,8 @@ class ServerMixInfo;
21class EffectContext; 21class EffectContext;
22class EffectBase; 22class EffectBase;
23struct AuxInfoDSP; 23struct AuxInfoDSP;
24struct I3dl2ReverbParams;
25struct I3dl2ReverbState;
24using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>; 26using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>;
25 27
26class CommandGenerator { 28class CommandGenerator {
@@ -80,6 +82,9 @@ private:
80 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, 82 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
81 u32 sample_count, u32 read_offset, u32 read_count); 83 u32 sample_count, u32 read_offset, u32 read_count);
82 84
85 void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
86 std::vector<u8>& work_buffer);
87 void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
83 // DSP Code 88 // DSP Code
84 s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, 89 s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
85 s32 channel, std::size_t mix_offset); 90 s32 channel, std::size_t mix_offset);
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index ec59a3ba9..fe546c55d 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -33,6 +33,29 @@ constexpr std::size_t TEMP_MIX_BASE_SIZE = 0x3f00; // TODO(ogniK): Work out this
33// and our const ends up being 0x3f04, the 4 bytes are most 33// and our const ends up being 0x3f04, the 4 bytes are most
34// likely the sample history 34// likely the sample history
35constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY; 35constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY;
36constexpr f32 I3DL2REVERB_MAX_LEVEL = 5000.0f;
37constexpr f32 I3DL2REVERB_MIN_REFLECTION_DURATION = 0.02f;
38constexpr std::size_t I3DL2REVERB_TAPS = 20;
39constexpr std::size_t I3DL2REVERB_DELAY_LINE_COUNT = 4;
40using Fractional = s32;
41
42template <typename T>
43constexpr Fractional ToFractional(T x) {
44 return static_cast<Fractional>(x * static_cast<T>(0x4000));
45}
46
47constexpr Fractional MultiplyFractional(Fractional lhs, Fractional rhs) {
48 return static_cast<Fractional>(static_cast<s64>(lhs) * rhs >> 14);
49}
50
51constexpr s32 FractionalToFixed(Fractional x) {
52 const auto s = x & (1 << 13);
53 return static_cast<s32>(x >> 14) + s;
54}
55
56constexpr s32 CalculateDelaySamples(s32 sample_rate_khz, float time) {
57 return FractionalToFixed(MultiplyFractional(ToFractional(sample_rate_khz), ToFractional(time)));
58}
36 59
37static constexpr u32 VersionFromRevision(u32_le rev) { 60static constexpr u32 VersionFromRevision(u32_le rev) {
38 // "REV7" -> 7 61 // "REV7" -> 7
diff --git a/src/audio_core/delay_line.cpp b/src/audio_core/delay_line.cpp
new file mode 100644
index 000000000..f4e4dd8d2
--- /dev/null
+++ b/src/audio_core/delay_line.cpp
@@ -0,0 +1,104 @@
1#include <cstring>
2#include "audio_core/delay_line.h"
3
4namespace AudioCore {
5DelayLineBase::DelayLineBase() = default;
6DelayLineBase::~DelayLineBase() = default;
7
8void DelayLineBase::Initialize(s32 max_delay_, float* src_buffer) {
9 buffer = src_buffer;
10 buffer_end = buffer + max_delay_;
11 max_delay = max_delay_;
12 output = buffer;
13 SetDelay(max_delay_);
14 Clear();
15}
16
17void DelayLineBase::SetDelay(s32 new_delay) {
18 if (max_delay < new_delay) {
19 return;
20 }
21 delay = new_delay;
22 input = (buffer + ((output - buffer) + new_delay) % (max_delay + 1));
23}
24
25s32 DelayLineBase::GetDelay() const {
26 return delay;
27}
28
29s32 DelayLineBase::GetMaxDelay() const {
30 return max_delay;
31}
32
33f32 DelayLineBase::TapOut(s32 last_sample) {
34 const float* ptr = input - (last_sample + 1);
35 if (ptr < buffer) {
36 ptr += (max_delay + 1);
37 }
38
39 return *ptr;
40}
41
42f32 DelayLineBase::Tick(f32 sample) {
43 *(input++) = sample;
44 const auto out_sample = *(output++);
45
46 if (buffer_end < input) {
47 input = buffer;
48 }
49
50 if (buffer_end < output) {
51 output = buffer;
52 }
53
54 return out_sample;
55}
56
57float* DelayLineBase::GetInput() {
58 return input;
59}
60
61const float* DelayLineBase::GetInput() const {
62 return input;
63}
64
65f32 DelayLineBase::GetOutputSample() const {
66 return *output;
67}
68
69void DelayLineBase::Clear() {
70 std::memset(buffer, 0, sizeof(float) * max_delay);
71}
72
73void DelayLineBase::Reset() {
74 buffer = nullptr;
75 buffer_end = nullptr;
76 max_delay = 0;
77 input = nullptr;
78 output = nullptr;
79 delay = 0;
80}
81
82DelayLineAllPass::DelayLineAllPass() = default;
83DelayLineAllPass::~DelayLineAllPass() = default;
84
85void DelayLineAllPass::Initialize(u32 delay_, float coeffcient_, f32* src_buffer) {
86 DelayLineBase::Initialize(delay_, src_buffer);
87 SetCoefficient(coeffcient_);
88}
89
90void DelayLineAllPass::SetCoefficient(float coeffcient_) {
91 coefficient = coeffcient_;
92}
93
94f32 DelayLineAllPass::Tick(f32 sample) {
95 const auto temp = sample - coefficient * *output;
96 return coefficient * temp + DelayLineBase::Tick(temp);
97}
98
99void DelayLineAllPass::Reset() {
100 coefficient = 0.0f;
101 DelayLineBase::Reset();
102}
103
104} // namespace AudioCore
diff --git a/src/audio_core/delay_line.h b/src/audio_core/delay_line.h
new file mode 100644
index 000000000..cafddd432
--- /dev/null
+++ b/src/audio_core/delay_line.h
@@ -0,0 +1,46 @@
1#pragma once
2
3#include "common/common_types.h"
4
5namespace AudioCore {
6
7class DelayLineBase {
8public:
9 DelayLineBase();
10 ~DelayLineBase();
11
12 void Initialize(s32 max_delay_, float* src_buffer);
13 void SetDelay(s32 new_delay);
14 s32 GetDelay() const;
15 s32 GetMaxDelay() const;
16 f32 TapOut(s32 last_sample);
17 f32 Tick(f32 sample);
18 float* GetInput();
19 const float* GetInput() const;
20 f32 GetOutputSample() const;
21 void Clear();
22 void Reset();
23
24protected:
25 float* buffer{nullptr};
26 float* buffer_end{nullptr};
27 s32 max_delay{};
28 float* input{nullptr};
29 float* output{nullptr};
30 s32 delay{};
31};
32
33class DelayLineAllPass final : public DelayLineBase {
34public:
35 DelayLineAllPass();
36 ~DelayLineAllPass();
37
38 void Initialize(u32 delay, float coeffcient_, f32* src_buffer);
39 void SetCoefficient(float coeffcient_);
40 f32 Tick(f32 sample);
41 void Reset();
42
43private:
44 float coefficient{};
45};
46} // namespace AudioCore
diff --git a/src/audio_core/effect_context.cpp b/src/audio_core/effect_context.cpp
index f770b9608..89e4573c7 100644
--- a/src/audio_core/effect_context.cpp
+++ b/src/audio_core/effect_context.cpp
@@ -90,6 +90,14 @@ s32 EffectBase::GetProcessingOrder() const {
90 return processing_order; 90 return processing_order;
91} 91}
92 92
93std::vector<u8>& EffectBase::GetWorkBuffer() {
94 return work_buffer;
95}
96
97const std::vector<u8>& EffectBase::GetWorkBuffer() const {
98 return work_buffer;
99}
100
93EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {} 101EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {}
94EffectI3dl2Reverb::~EffectI3dl2Reverb() = default; 102EffectI3dl2Reverb::~EffectI3dl2Reverb() = default;
95 103
@@ -117,6 +125,12 @@ void EffectI3dl2Reverb::Update(EffectInfo::InParams& in_params) {
117 usage = UsageState::Initialized; 125 usage = UsageState::Initialized;
118 params.status = ParameterStatus::Initialized; 126 params.status = ParameterStatus::Initialized;
119 skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0; 127 skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0;
128 if (!skipped) {
129 auto& cur_work_buffer = GetWorkBuffer();
130 // Has two buffers internally
131 cur_work_buffer.resize(in_params.buffer_size * 2);
132 std::fill(cur_work_buffer.begin(), cur_work_buffer.end(), 0);
133 }
120 } 134 }
121} 135}
122 136
@@ -129,6 +143,14 @@ void EffectI3dl2Reverb::UpdateForCommandGeneration() {
129 GetParams().status = ParameterStatus::Updated; 143 GetParams().status = ParameterStatus::Updated;
130} 144}
131 145
146I3dl2ReverbState& EffectI3dl2Reverb::GetState() {
147 return state;
148}
149
150const I3dl2ReverbState& EffectI3dl2Reverb::GetState() const {
151 return state;
152}
153
132EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {} 154EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {}
133EffectBiquadFilter::~EffectBiquadFilter() = default; 155EffectBiquadFilter::~EffectBiquadFilter() = default;
134 156
diff --git a/src/audio_core/effect_context.h b/src/audio_core/effect_context.h
index c5e0b398c..5e0655dd7 100644
--- a/src/audio_core/effect_context.h
+++ b/src/audio_core/effect_context.h
@@ -8,6 +8,7 @@
8#include <memory> 8#include <memory>
9#include <vector> 9#include <vector>
10#include "audio_core/common.h" 10#include "audio_core/common.h"
11#include "audio_core/delay_line.h"
11#include "common/common_funcs.h" 12#include "common/common_funcs.h"
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/swap.h" 14#include "common/swap.h"
@@ -194,6 +195,8 @@ public:
194 [[nodiscard]] bool IsEnabled() const; 195 [[nodiscard]] bool IsEnabled() const;
195 [[nodiscard]] s32 GetMixID() const; 196 [[nodiscard]] s32 GetMixID() const;
196 [[nodiscard]] s32 GetProcessingOrder() const; 197 [[nodiscard]] s32 GetProcessingOrder() const;
198 [[nodiscard]] std::vector<u8>& GetWorkBuffer();
199 [[nodiscard]] const std::vector<u8>& GetWorkBuffer() const;
197 200
198protected: 201protected:
199 UsageState usage{UsageState::Invalid}; 202 UsageState usage{UsageState::Invalid};
@@ -201,6 +204,7 @@ protected:
201 s32 mix_id{}; 204 s32 mix_id{};
202 s32 processing_order{}; 205 s32 processing_order{};
203 bool enabled = false; 206 bool enabled = false;
207 std::vector<u8> work_buffer{};
204}; 208};
205 209
206template <typename T> 210template <typename T>
@@ -212,7 +216,7 @@ public:
212 return internal_params; 216 return internal_params;
213 } 217 }
214 218
215 const I3dl2ReverbParams& GetParams() const { 219 const T& GetParams() const {
216 return internal_params; 220 return internal_params;
217 } 221 }
218 222
@@ -229,6 +233,27 @@ public:
229 void UpdateForCommandGeneration() override; 233 void UpdateForCommandGeneration() override;
230}; 234};
231 235
236struct I3dl2ReverbState {
237 f32 lowpass_0{};
238 f32 lowpass_1{};
239 f32 lowpass_2{};
240
241 DelayLineBase early_delay_line{};
242 std::array<u32, AudioCommon::I3DL2REVERB_TAPS> early_tap_steps{};
243 f32 early_gain{};
244 f32 late_gain{};
245
246 u32 early_to_late_taps{};
247 std::array<DelayLineBase, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fdn_delay_line{};
248 std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line0{};
249 std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line1{};
250 f32 last_reverb_echo{};
251 DelayLineBase center_delay_line{};
252 std::array<std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>, 3> lpf_coefficients{};
253 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> shelf_filter{};
254 f32 dry_gain{};
255};
256
232class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> { 257class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> {
233public: 258public:
234 explicit EffectI3dl2Reverb(); 259 explicit EffectI3dl2Reverb();
@@ -237,8 +262,12 @@ public:
237 void Update(EffectInfo::InParams& in_params) override; 262 void Update(EffectInfo::InParams& in_params) override;
238 void UpdateForCommandGeneration() override; 263 void UpdateForCommandGeneration() override;
239 264
265 I3dl2ReverbState& GetState();
266 const I3dl2ReverbState& GetState() const;
267
240private: 268private:
241 bool skipped = false; 269 bool skipped = false;
270 I3dl2ReverbState state{};
242}; 271};
243 272
244class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> { 273class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bfd11e76d..b657506b1 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -168,7 +168,6 @@ add_library(common STATIC
168 time_zone.cpp 168 time_zone.cpp
169 time_zone.h 169 time_zone.h
170 tree.h 170 tree.h
171 uint128.cpp
172 uint128.h 171 uint128.h
173 uuid.cpp 172 uuid.cpp
174 uuid.h 173 uuid.h
@@ -206,6 +205,8 @@ if (MSVC)
206else() 205else()
207 target_compile_options(common PRIVATE 206 target_compile_options(common PRIVATE
208 -Werror 207 -Werror
208
209 $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
209 ) 210 )
210endif() 211endif()
211 212
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
deleted file mode 100644
index 16bf7c828..000000000
--- a/src/common/uint128.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef _MSC_VER
6#include <intrin.h>
7
8#pragma intrinsic(_umul128)
9#pragma intrinsic(_udiv128)
10#endif
11#include <cstring>
12#include "common/uint128.h"
13
14namespace Common {
15
16#ifdef _MSC_VER
17
18u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
19 u128 r{};
20 r[0] = _umul128(a, b, &r[1]);
21 u64 remainder;
22#if _MSC_VER < 1923
23 return udiv128(r[1], r[0], d, &remainder);
24#else
25 return _udiv128(r[1], r[0], d, &remainder);
26#endif
27}
28
29#else
30
31u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
32 const u64 diva = a / d;
33 const u64 moda = a % d;
34 const u64 divb = b / d;
35 const u64 modb = b % d;
36 return diva * b + moda * divb + moda * modb / d;
37}
38
39#endif
40
41u128 Multiply64Into128(u64 a, u64 b) {
42 u128 result;
43#ifdef _MSC_VER
44 result[0] = _umul128(a, b, &result[1]);
45#else
46 unsigned __int128 tmp = a;
47 tmp *= b;
48 std::memcpy(&result, &tmp, sizeof(u128));
49#endif
50 return result;
51}
52
53std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
54 u64 remainder = dividend[0] % divisor;
55 u64 accum = dividend[0] / divisor;
56 if (dividend[1] == 0)
57 return {accum, remainder};
58 // We ignore dividend[1] / divisor as that overflows
59 const u64 first_segment = (dividend[1] % divisor) << 32;
60 accum += (first_segment / divisor) << 32;
61 const u64 second_segment = (first_segment % divisor) << 32;
62 accum += (second_segment / divisor);
63 remainder += second_segment % divisor;
64 if (remainder >= divisor) {
65 accum++;
66 remainder -= divisor;
67 }
68 return {accum, remainder};
69}
70
71} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
index 969259ab6..83560a9ce 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -4,19 +4,98 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstring>
7#include <utility> 8#include <utility>
9
10#ifdef _MSC_VER
11#include <intrin.h>
12#pragma intrinsic(__umulh)
13#pragma intrinsic(_umul128)
14#pragma intrinsic(_udiv128)
15#else
16#include <x86intrin.h>
17#endif
18
8#include "common/common_types.h" 19#include "common/common_types.h"
9 20
10namespace Common { 21namespace Common {
11 22
12// This function multiplies 2 u64 values and divides it by a u64 value. 23// This function multiplies 2 u64 values and divides it by a u64 value.
13[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); 24[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
25#ifdef _MSC_VER
26 u128 r{};
27 r[0] = _umul128(a, b, &r[1]);
28 u64 remainder;
29#if _MSC_VER < 1923
30 return udiv128(r[1], r[0], d, &remainder);
31#else
32 return _udiv128(r[1], r[0], d, &remainder);
33#endif
34#else
35 const u64 diva = a / d;
36 const u64 moda = a % d;
37 const u64 divb = b / d;
38 const u64 modb = b % d;
39 return diva * b + moda * divb + moda * modb / d;
40#endif
41}
14 42
15// This function multiplies 2 u64 values and produces a u128 value; 43// This function multiplies 2 u64 values and produces a u128 value;
16[[nodiscard]] u128 Multiply64Into128(u64 a, u64 b); 44[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
45 u128 result;
46#ifdef _MSC_VER
47 result[0] = _umul128(a, b, &result[1]);
48#else
49 unsigned __int128 tmp = a;
50 tmp *= b;
51 std::memcpy(&result, &tmp, sizeof(u128));
52#endif
53 return result;
54}
55
56[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
57#ifdef __SIZEOF_INT128__
58 const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
59 return static_cast<u64>(base / divisor);
60#elif defined(_M_X64) || defined(_M_ARM64)
61 std::array<u64, 2> r = {0, numerator};
62 u64 remainder;
63#if _MSC_VER < 1923
64 return udiv128(r[1], r[0], divisor, &remainder);
65#else
66 return _udiv128(r[1], r[0], divisor, &remainder);
67#endif
68#else
69 // This one is bit more inaccurate.
70 return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
71#endif
72}
73
74[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
75#ifdef __SIZEOF_INT128__
76 return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
77#elif defined(_M_X64) || defined(_M_ARM64)
78 return __umulh(a, b); // MSVC
79#else
80 // Generic fallback
81 const u64 a_lo = u32(a);
82 const u64 a_hi = a >> 32;
83 const u64 b_lo = u32(b);
84 const u64 b_hi = b >> 32;
85
86 const u64 a_x_b_hi = a_hi * b_hi;
87 const u64 a_x_b_mid = a_hi * b_lo;
88 const u64 b_x_a_mid = b_hi * a_lo;
89 const u64 a_x_b_lo = a_lo * b_lo;
90
91 const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
92 static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
93 32;
94
95 const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
17 96
18// This function divides a u128 by a u32 value and produces two u64 values: 97 return multhi;
19// the result of division and the remainder 98#endif
20[[nodiscard]] std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor); 99}
21 100
22} // namespace Common 101} // namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index a8c143f85..1545993bd 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstdint>
6
5#include "common/uint128.h" 7#include "common/uint128.h"
6#include "common/wall_clock.h" 8#include "common/wall_clock.h"
7 9
@@ -18,7 +20,9 @@ using base_time_point = std::chrono::time_point<base_timer>;
18class StandardWallClock final : public WallClock { 20class StandardWallClock final : public WallClock {
19public: 21public:
20 explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) 22 explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
21 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) { 23 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false),
24 emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)},
25 emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} {
22 start_time = base_timer::now(); 26 start_time = base_timer::now();
23 } 27 }
24 28
@@ -41,16 +45,11 @@ public:
41 } 45 }
42 46
43 u64 GetClockCycles() override { 47 u64 GetClockCycles() override {
44 std::chrono::nanoseconds time_now = GetTimeNS(); 48 return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor);
45 const u128 temporary =
46 Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
47 return Common::Divide128On32(temporary, 1000000000).first;
48 } 49 }
49 50
50 u64 GetCPUCycles() override { 51 u64 GetCPUCycles() override {
51 std::chrono::nanoseconds time_now = GetTimeNS(); 52 return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor);
52 const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
53 return Common::Divide128On32(temporary, 1000000000).first;
54 } 53 }
55 54
56 void Pause([[maybe_unused]] bool is_paused) override { 55 void Pause([[maybe_unused]] bool is_paused) override {
@@ -59,6 +58,8 @@ public:
59 58
60private: 59private:
61 base_time_point start_time; 60 base_time_point start_time;
61 const u64 emulated_clock_factor;
62 const u64 emulated_cpu_factor;
62}; 63};
63 64
64#ifdef ARCHITECTURE_x86_64 65#ifdef ARCHITECTURE_x86_64
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index a65f6b832..87de40624 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -8,68 +8,10 @@
8#include <mutex> 8#include <mutex>
9#include <thread> 9#include <thread>
10 10
11#ifdef _MSC_VER
12#include <intrin.h>
13
14#pragma intrinsic(__umulh)
15#pragma intrinsic(_udiv128)
16#else
17#include <x86intrin.h>
18#endif
19
20#include "common/atomic_ops.h" 11#include "common/atomic_ops.h"
21#include "common/uint128.h" 12#include "common/uint128.h"
22#include "common/x64/native_clock.h" 13#include "common/x64/native_clock.h"
23 14
24namespace {
25
26[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
27#ifdef __SIZEOF_INT128__
28 const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
29 return static_cast<u64>(base / divisor);
30#elif defined(_M_X64) || defined(_M_ARM64)
31 std::array<u64, 2> r = {0, numerator};
32 u64 remainder;
33#if _MSC_VER < 1923
34 return udiv128(r[1], r[0], divisor, &remainder);
35#else
36 return _udiv128(r[1], r[0], divisor, &remainder);
37#endif
38#else
39 // This one is bit more inaccurate.
40 return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
41#endif
42}
43
44[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
45#ifdef __SIZEOF_INT128__
46 return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
47#elif defined(_M_X64) || defined(_M_ARM64)
48 return __umulh(a, b); // MSVC
49#else
50 // Generic fallback
51 const u64 a_lo = u32(a);
52 const u64 a_hi = a >> 32;
53 const u64 b_lo = u32(b);
54 const u64 b_hi = b >> 32;
55
56 const u64 a_x_b_hi = a_hi * b_hi;
57 const u64 a_x_b_mid = a_hi * b_lo;
58 const u64 b_x_a_mid = b_hi * a_lo;
59 const u64 a_x_b_lo = a_lo * b_lo;
60
61 const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
62 static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
63 32;
64
65 const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
66
67 return multhi;
68#endif
69}
70
71} // namespace
72
73namespace Common { 15namespace Common {
74 16
75u64 EstimateRDTSCFrequency() { 17u64 EstimateRDTSCFrequency() {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 386d7bddf..c6bdf72ec 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -19,7 +19,6 @@ add_library(core STATIC
19 core.h 19 core.h
20 core_timing.cpp 20 core_timing.cpp
21 core_timing.h 21 core_timing.h
22 core_timing_util.cpp
23 core_timing_util.h 22 core_timing_util.h
24 cpu_manager.cpp 23 cpu_manager.cpp
25 cpu_manager.h 24 cpu_manager.h
@@ -148,7 +147,7 @@ add_library(core STATIC
148 hle/kernel/client_session.h 147 hle/kernel/client_session.h
149 hle/kernel/code_set.cpp 148 hle/kernel/code_set.cpp
150 hle/kernel/code_set.h 149 hle/kernel/code_set.h
151 hle/kernel/errors.h 150 hle/kernel/svc_results.h
152 hle/kernel/global_scheduler_context.cpp 151 hle/kernel/global_scheduler_context.cpp
153 hle/kernel/global_scheduler_context.h 152 hle/kernel/global_scheduler_context.h
154 hle/kernel/handle_table.cpp 153 hle/kernel/handle_table.cpp
@@ -174,6 +173,7 @@ add_library(core STATIC
174 hle/kernel/k_scheduler.h 173 hle/kernel/k_scheduler.h
175 hle/kernel/k_scheduler_lock.h 174 hle/kernel/k_scheduler_lock.h
176 hle/kernel/k_scoped_lock.h 175 hle/kernel/k_scoped_lock.h
176 hle/kernel/k_scoped_resource_reservation.h
177 hle/kernel/k_scoped_scheduler_lock_and_sleep.h 177 hle/kernel/k_scoped_scheduler_lock_and_sleep.h
178 hle/kernel/k_synchronization_object.cpp 178 hle/kernel/k_synchronization_object.cpp
179 hle/kernel/k_synchronization_object.h 179 hle/kernel/k_synchronization_object.h
@@ -223,7 +223,6 @@ add_library(core STATIC
223 hle/kernel/svc.cpp 223 hle/kernel/svc.cpp
224 hle/kernel/svc.h 224 hle/kernel/svc.h
225 hle/kernel/svc_common.h 225 hle/kernel/svc_common.h
226 hle/kernel/svc_results.h
227 hle/kernel/svc_types.h 226 hle/kernel/svc_types.h
228 hle/kernel/svc_wrap.h 227 hle/kernel/svc_wrap.h
229 hle/kernel/time_manager.cpp 228 hle/kernel/time_manager.cpp
@@ -266,6 +265,7 @@ add_library(core STATIC
266 hle/service/am/applets/software_keyboard.h 265 hle/service/am/applets/software_keyboard.h
267 hle/service/am/applets/web_browser.cpp 266 hle/service/am/applets/web_browser.cpp
268 hle/service/am/applets/web_browser.h 267 hle/service/am/applets/web_browser.h
268 hle/service/am/applets/web_types.h
269 hle/service/am/idle.cpp 269 hle/service/am/idle.cpp
270 hle/service/am/idle.h 270 hle/service/am/idle.h
271 hle/service/am/omm.cpp 271 hle/service/am/omm.cpp
@@ -400,6 +400,7 @@ add_library(core STATIC
400 hle/service/hid/controllers/xpad.h 400 hle/service/hid/controllers/xpad.h
401 hle/service/lbl/lbl.cpp 401 hle/service/lbl/lbl.cpp
402 hle/service/lbl/lbl.h 402 hle/service/lbl/lbl.h
403 hle/service/ldn/errors.h
403 hle/service/ldn/ldn.cpp 404 hle/service/ldn/ldn.cpp
404 hle/service/ldn/ldn.h 405 hle/service/ldn/ldn.h
405 hle/service/ldr/ldr.cpp 406 hle/service/ldr/ldr.cpp
@@ -653,6 +654,8 @@ else()
653 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> 654 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
654 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> 655 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
655 656
657 $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
658
656 -Wno-sign-conversion 659 -Wno-sign-conversion
657 ) 660 )
658endif() 661endif()
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
deleted file mode 100644
index 8ce8e602e..000000000
--- a/src/core/core_timing_util.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
1// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#include "core/core_timing_util.h"
6
7#include <cinttypes>
8#include <limits>
9#include "common/logging/log.h"
10#include "common/uint128.h"
11#include "core/hardware_properties.h"
12
13namespace Core::Timing {
14
15constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;
16
17s64 msToCycles(std::chrono::milliseconds ms) {
18 if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
19 LOG_ERROR(Core_Timing, "Integer overflow, use max value");
20 return std::numeric_limits<s64>::max();
21 }
22 if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
23 LOG_DEBUG(Core_Timing, "Time very big, do rounding");
24 return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
25 }
26 return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
27}
28
29s64 usToCycles(std::chrono::microseconds us) {
30 if (static_cast<u64>(us.count() / 1000000) > MAX_VALUE_TO_MULTIPLY) {
31 LOG_ERROR(Core_Timing, "Integer overflow, use max value");
32 return std::numeric_limits<s64>::max();
33 }
34 if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
35 LOG_DEBUG(Core_Timing, "Time very big, do rounding");
36 return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
37 }
38 return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
39}
40
41s64 nsToCycles(std::chrono::nanoseconds ns) {
42 const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
43 return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
44}
45
46u64 msToClockCycles(std::chrono::milliseconds ns) {
47 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
48 return Common::Divide128On32(temp, 1000).first;
49}
50
51u64 usToClockCycles(std::chrono::microseconds ns) {
52 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
53 return Common::Divide128On32(temp, 1000000).first;
54}
55
56u64 nsToClockCycles(std::chrono::nanoseconds ns) {
57 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
58 return Common::Divide128On32(temp, 1000000000).first;
59}
60
61u64 CpuCyclesToClockCycles(u64 ticks) {
62 const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
63 return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
64}
65
66std::chrono::milliseconds CyclesToMs(s64 cycles) {
67 const u128 temporal = Common::Multiply64Into128(cycles, 1000);
68 u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
69 return std::chrono::milliseconds(ms);
70}
71
72std::chrono::nanoseconds CyclesToNs(s64 cycles) {
73 const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
74 u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
75 return std::chrono::nanoseconds(ns);
76}
77
78std::chrono::microseconds CyclesToUs(s64 cycles) {
79 const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
80 u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
81 return std::chrono::microseconds(us);
82}
83
84} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index e4a046bf9..14c36a485 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -1,24 +1,59 @@
1// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2+ 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono> 7#include <chrono>
8
8#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hardware_properties.h"
9 11
10namespace Core::Timing { 12namespace Core::Timing {
11 13
12s64 msToCycles(std::chrono::milliseconds ms); 14namespace detail {
13s64 usToCycles(std::chrono::microseconds us); 15constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
14s64 nsToCycles(std::chrono::nanoseconds ns); 16constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
15u64 msToClockCycles(std::chrono::milliseconds ns); 17} // namespace detail
16u64 usToClockCycles(std::chrono::microseconds ns); 18
17u64 nsToClockCycles(std::chrono::nanoseconds ns); 19[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
18std::chrono::milliseconds CyclesToMs(s64 cycles); 20 return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
19std::chrono::nanoseconds CyclesToNs(s64 cycles); 21}
20std::chrono::microseconds CyclesToUs(s64 cycles); 22
21 23[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
22u64 CpuCyclesToClockCycles(u64 ticks); 24 return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
25}
26
27[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
28 return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
29}
30
31[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
32 return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
33}
34
35[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
36 return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
37}
38
39[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
40 return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
41}
42
43[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
44 return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
45}
46
47[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
48 return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
49}
50
51[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
52 return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
53}
54
55[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
56 return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
57}
23 58
24} // namespace Core::Timing 59} // namespace Core::Timing
diff --git a/src/core/frontend/applets/controller.h b/src/core/frontend/applets/controller.h
index dff71d8d9..b0626a0f9 100644
--- a/src/core/frontend/applets/controller.h
+++ b/src/core/frontend/applets/controller.h
@@ -31,6 +31,7 @@ struct ControllerParameters {
31 bool allow_dual_joycons{}; 31 bool allow_dual_joycons{};
32 bool allow_left_joycon{}; 32 bool allow_left_joycon{};
33 bool allow_right_joycon{}; 33 bool allow_right_joycon{};
34 bool allow_gamecube_controller{};
34}; 35};
35 36
36class ControllerApplet { 37class ControllerApplet {
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index f8f005f15..0b6957e31 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -4,11 +4,11 @@
4 4
5#include "core/hle/kernel/client_port.h" 5#include "core/hle/kernel/client_port.h"
6#include "core/hle/kernel/client_session.h" 6#include "core/hle/kernel/client_session.h"
7#include "core/hle/kernel/errors.h"
8#include "core/hle/kernel/hle_ipc.h" 7#include "core/hle/kernel/hle_ipc.h"
9#include "core/hle/kernel/object.h" 8#include "core/hle/kernel/object.h"
10#include "core/hle/kernel/server_port.h" 9#include "core/hle/kernel/server_port.h"
11#include "core/hle/kernel/session.h" 10#include "core/hle/kernel/session.h"
11#include "core/hle/kernel/svc_results.h"
12 12
13namespace Kernel { 13namespace Kernel {
14 14
@@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const {
21 21
22ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { 22ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
23 if (active_sessions >= max_sessions) { 23 if (active_sessions >= max_sessions) {
24 return ERR_MAX_CONNECTIONS_REACHED; 24 return ResultMaxConnectionsReached;
25 } 25 }
26 active_sessions++; 26 active_sessions++;
27 27
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index a2be1a8f6..e230f365a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -3,11 +3,11 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/client_session.h" 5#include "core/hle/kernel/client_session.h"
6#include "core/hle/kernel/errors.h"
7#include "core/hle/kernel/hle_ipc.h" 6#include "core/hle/kernel/hle_ipc.h"
8#include "core/hle/kernel/k_thread.h" 7#include "core/hle/kernel/k_thread.h"
9#include "core/hle/kernel/server_session.h" 8#include "core/hle/kernel/server_session.h"
10#include "core/hle/kernel/session.h" 9#include "core/hle/kernel/session.h"
10#include "core/hle/kernel/svc_results.h"
11#include "core/hle/result.h" 11#include "core/hle/result.h"
12 12
13namespace Kernel { 13namespace Kernel {
@@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread,
43 Core::Timing::CoreTiming& core_timing) { 43 Core::Timing::CoreTiming& core_timing) {
44 // Keep ServerSession alive until we're done working with it. 44 // Keep ServerSession alive until we're done working with it.
45 if (!parent->Server()) { 45 if (!parent->Server()) {
46 return ERR_SESSION_CLOSED_BY_REMOTE; 46 return ResultSessionClosedByRemote;
47 } 47 }
48 48
49 // Signal the server session that new data is available 49 // Signal the server session that new data is available
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
deleted file mode 100644
index 7d32a39f0..000000000
--- a/src/core/hle/kernel/errors.h
+++ /dev/null
@@ -1,43 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Kernel {
10
11// Confirmed Switch kernel error codes
12
13constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
16constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
17constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
18constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
19constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
20constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
21constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
22constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
23constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
24constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
25constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
26constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
27constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112};
28constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114};
29constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
30constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
31constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
32constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
33constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
34constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
35constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
36constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
37constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122};
38constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123};
39constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125};
40constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126};
41constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132};
42
43} // namespace Kernel
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 1a2fa9cd8..f96d34078 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -6,12 +6,12 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/handle_table.h" 9#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/k_scheduler.h" 10#include "core/hle/kernel/k_scheduler.h"
12#include "core/hle/kernel/k_thread.h" 11#include "core/hle/kernel/k_thread.h"
13#include "core/hle/kernel/kernel.h" 12#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
14#include "core/hle/kernel/svc_results.h"
15 15
16namespace Kernel { 16namespace Kernel {
17namespace { 17namespace {
@@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default;
33ResultCode HandleTable::SetSize(s32 handle_table_size) { 33ResultCode HandleTable::SetSize(s32 handle_table_size) {
34 if (static_cast<u32>(handle_table_size) > MAX_COUNT) { 34 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
35 LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT); 35 LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT);
36 return ERR_OUT_OF_MEMORY; 36 return ResultOutOfMemory;
37 } 37 }
38 38
39 // Values less than or equal to zero indicate to use the maximum allowable 39 // Values less than or equal to zero indicate to use the maximum allowable
@@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) {
53 const u16 slot = next_free_slot; 53 const u16 slot = next_free_slot;
54 if (slot >= table_size) { 54 if (slot >= table_size) {
55 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 55 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
56 return ERR_HANDLE_TABLE_FULL; 56 return ResultHandleTableFull;
57 } 57 }
58 next_free_slot = generations[slot]; 58 next_free_slot = generations[slot];
59 59
@@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
76 std::shared_ptr<Object> object = GetGeneric(handle); 76 std::shared_ptr<Object> object = GetGeneric(handle);
77 if (object == nullptr) { 77 if (object == nullptr) {
78 LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle); 78 LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle);
79 return ERR_INVALID_HANDLE; 79 return ResultInvalidHandle;
80 } 80 }
81 return Create(std::move(object)); 81 return Create(std::move(object));
82} 82}
@@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
84ResultCode HandleTable::Close(Handle handle) { 84ResultCode HandleTable::Close(Handle handle) {
85 if (!IsValid(handle)) { 85 if (!IsValid(handle)) {
86 LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle); 86 LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle);
87 return ERR_INVALID_HANDLE; 87 return ResultInvalidHandle;
88 } 88 }
89 89
90 const u16 slot = GetSlot(handle); 90 const u16 slot = GetSlot(handle);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 7ec62cf18..161d9f782 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,7 +14,6 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "core/hle/ipc_helpers.h" 16#include "core/hle/ipc_helpers.h"
17#include "core/hle/kernel/errors.h"
18#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
19#include "core/hle/kernel/hle_ipc.h" 18#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/k_readable_event.h" 19#include "core/hle/kernel/k_readable_event.h"
@@ -26,6 +25,7 @@
26#include "core/hle/kernel/object.h" 25#include "core/hle/kernel/object.h"
27#include "core/hle/kernel/process.h" 26#include "core/hle/kernel/process.h"
28#include "core/hle/kernel/server_session.h" 27#include "core/hle/kernel/server_session.h"
28#include "core/hle/kernel/svc_results.h"
29#include "core/hle/kernel/time_manager.h" 29#include "core/hle/kernel/time_manager.h"
30#include "core/memory.h" 30#include "core/memory.h"
31 31
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index d0e90fd60..7018f56da 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
120 s32 user_value{}; 120 s32 user_value{};
121 if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) { 121 if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) {
122 LOG_ERROR(Kernel, "Invalid current memory!"); 122 LOG_ERROR(Kernel, "Invalid current memory!");
123 return Svc::ResultInvalidCurrentMemory; 123 return ResultInvalidCurrentMemory;
124 } 124 }
125 if (user_value != value) { 125 if (user_value != value) {
126 return Svc::ResultInvalidState; 126 return ResultInvalidState;
127 } 127 }
128 128
129 auto it = thread_tree.nfind_light({addr, -1}); 129 auto it = thread_tree.nfind_light({addr, -1});
@@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
189 189
190 if (!succeeded) { 190 if (!succeeded) {
191 LOG_ERROR(Kernel, "Invalid current memory!"); 191 LOG_ERROR(Kernel, "Invalid current memory!");
192 return Svc::ResultInvalidCurrentMemory; 192 return ResultInvalidCurrentMemory;
193 } 193 }
194 if (user_value != value) { 194 if (user_value != value) {
195 return Svc::ResultInvalidState; 195 return ResultInvalidState;
196 } 196 }
197 197
198 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && 198 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
@@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
221 // Check that the thread isn't terminating. 221 // Check that the thread isn't terminating.
222 if (cur_thread->IsTerminationRequested()) { 222 if (cur_thread->IsTerminationRequested()) {
223 slp.CancelSleep(); 223 slp.CancelSleep();
224 return Svc::ResultTerminationRequested; 224 return ResultTerminationRequested;
225 } 225 }
226 226
227 // Set the synced object. 227 // Set the synced object.
228 cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 228 cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
229 229
230 // Read the value from userspace. 230 // Read the value from userspace.
231 s32 user_value{}; 231 s32 user_value{};
@@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
238 238
239 if (!succeeded) { 239 if (!succeeded) {
240 slp.CancelSleep(); 240 slp.CancelSleep();
241 return Svc::ResultInvalidCurrentMemory; 241 return ResultInvalidCurrentMemory;
242 } 242 }
243 243
244 // Check that the value is less than the specified one. 244 // Check that the value is less than the specified one.
245 if (user_value >= value) { 245 if (user_value >= value) {
246 slp.CancelSleep(); 246 slp.CancelSleep();
247 return Svc::ResultInvalidState; 247 return ResultInvalidState;
248 } 248 }
249 249
250 // Check that the timeout is non-zero. 250 // Check that the timeout is non-zero.
251 if (timeout == 0) { 251 if (timeout == 0) {
252 slp.CancelSleep(); 252 slp.CancelSleep();
253 return Svc::ResultTimedOut; 253 return ResultTimedOut;
254 } 254 }
255 255
256 // Set the arbiter. 256 // Set the arbiter.
@@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
288 // Check that the thread isn't terminating. 288 // Check that the thread isn't terminating.
289 if (cur_thread->IsTerminationRequested()) { 289 if (cur_thread->IsTerminationRequested()) {
290 slp.CancelSleep(); 290 slp.CancelSleep();
291 return Svc::ResultTerminationRequested; 291 return ResultTerminationRequested;
292 } 292 }
293 293
294 // Set the synced object. 294 // Set the synced object.
295 cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 295 cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
296 296
297 // Read the value from userspace. 297 // Read the value from userspace.
298 s32 user_value{}; 298 s32 user_value{};
299 if (!ReadFromUser(system, &user_value, addr)) { 299 if (!ReadFromUser(system, &user_value, addr)) {
300 slp.CancelSleep(); 300 slp.CancelSleep();
301 return Svc::ResultInvalidCurrentMemory; 301 return ResultInvalidCurrentMemory;
302 } 302 }
303 303
304 // Check that the value is equal. 304 // Check that the value is equal.
305 if (value != user_value) { 305 if (value != user_value) {
306 slp.CancelSleep(); 306 slp.CancelSleep();
307 return Svc::ResultInvalidState; 307 return ResultInvalidState;
308 } 308 }
309 309
310 // Check that the timeout is non-zero. 310 // Check that the timeout is non-zero.
311 if (timeout == 0) { 311 if (timeout == 0) {
312 slp.CancelSleep(); 312 slp.CancelSleep();
313 return Svc::ResultTimedOut; 313 return ResultTimedOut;
314 } 314 }
315 315
316 // Set the arbiter. 316 // Set the arbiter.
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index f0ad8b390..170d8fa0d 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
92 // Write the value to userspace. 92 // Write the value to userspace.
93 if (!WriteToUser(system, addr, std::addressof(next_value))) { 93 if (!WriteToUser(system, addr, std::addressof(next_value))) {
94 if (next_owner_thread) { 94 if (next_owner_thread) {
95 next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); 95 next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
96 } 96 }
97 97
98 return Svc::ResultInvalidCurrentMemory; 98 return ResultInvalidCurrentMemory;
99 } 99 }
100 } 100 }
101 101
@@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val
114 cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); 114 cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
115 115
116 // Check if the thread should terminate. 116 // Check if the thread should terminate.
117 R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested); 117 R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested);
118 118
119 { 119 {
120 // Read the tag from userspace. 120 // Read the tag from userspace.
121 u32 test_tag{}; 121 u32 test_tag{};
122 R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr), 122 R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
123 Svc::ResultInvalidCurrentMemory); 123 ResultInvalidCurrentMemory);
124 124
125 // If the tag isn't the handle (with wait mask), we're done. 125 // If the tag isn't the handle (with wait mask), we're done.
126 R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS); 126 R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);
127 127
128 // Get the lock owner thread. 128 // Get the lock owner thread.
129 owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle); 129 owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle);
130 R_UNLESS(owner_thread, Svc::ResultInvalidHandle); 130 R_UNLESS(owner_thread, ResultInvalidHandle);
131 131
132 // Update the lock. 132 // Update the lock.
133 cur_thread->SetAddressKey(addr, value); 133 cur_thread->SetAddressKey(addr, value);
@@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) {
191 thread_to_close = owner_thread.get(); 191 thread_to_close = owner_thread.get();
192 } else { 192 } else {
193 // The lock was tagged with a thread that doesn't exist. 193 // The lock was tagged with a thread that doesn't exist.
194 thread->SetSyncedObject(nullptr, Svc::ResultInvalidState); 194 thread->SetSyncedObject(nullptr, ResultInvalidState);
195 thread->Wakeup(); 195 thread->Wakeup();
196 } 196 }
197 } 197 }
198 } else { 198 } else {
199 // If the address wasn't accessible, note so. 199 // If the address wasn't accessible, note so.
200 thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); 200 thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
201 thread->Wakeup(); 201 thread->Wakeup();
202 } 202 }
203 203
@@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
263 KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout}; 263 KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout};
264 264
265 // Set the synced object. 265 // Set the synced object.
266 cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 266 cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
267 267
268 // Check that the thread isn't terminating. 268 // Check that the thread isn't terminating.
269 if (cur_thread->IsTerminationRequested()) { 269 if (cur_thread->IsTerminationRequested()) {
270 slp.CancelSleep(); 270 slp.CancelSleep();
271 return Svc::ResultTerminationRequested; 271 return ResultTerminationRequested;
272 } 272 }
273 273
274 // Update the value and process for the next owner. 274 // Update the value and process for the next owner.
@@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
302 // Write the value to userspace. 302 // Write the value to userspace.
303 if (!WriteToUser(system, addr, std::addressof(next_value))) { 303 if (!WriteToUser(system, addr, std::addressof(next_value))) {
304 slp.CancelSleep(); 304 slp.CancelSleep();
305 return Svc::ResultInvalidCurrentMemory; 305 return ResultInvalidCurrentMemory;
306 } 306 }
307 } 307 }
308 308
diff --git a/src/core/hle/kernel/k_readable_event.cpp b/src/core/hle/kernel/k_readable_event.cpp
index d8a42dbaf..4b4d34857 100644
--- a/src/core/hle/kernel/k_readable_event.cpp
+++ b/src/core/hle/kernel/k_readable_event.cpp
@@ -6,7 +6,6 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/common_funcs.h" 7#include "common/common_funcs.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/k_readable_event.h" 9#include "core/hle/kernel/k_readable_event.h"
11#include "core/hle/kernel/k_scheduler.h" 10#include "core/hle/kernel/k_scheduler.h"
12#include "core/hle/kernel/k_thread.h" 11#include "core/hle/kernel/k_thread.h"
@@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() {
47 KScopedSchedulerLock lk{kernel}; 46 KScopedSchedulerLock lk{kernel};
48 47
49 if (!is_signaled) { 48 if (!is_signaled) {
50 return Svc::ResultInvalidState; 49 return ResultInvalidState;
51 } 50 }
52 51
53 is_signaled = false; 52 is_signaled = false;
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp
index ab2ab683f..d7a4a38e6 100644
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const {
75ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { 75ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
76 const auto index = static_cast<std::size_t>(which); 76 const auto index = static_cast<std::size_t>(which);
77 KScopedLightLock lk(lock); 77 KScopedLightLock lk(lock);
78 R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState); 78 R_UNLESS(current_values[index] <= value, ResultInvalidState);
79 79
80 limit_values[index] = value; 80 limit_values[index] = value;
81 81
diff --git a/src/core/hle/kernel/k_scoped_resource_reservation.h b/src/core/hle/kernel/k_scoped_resource_reservation.h
new file mode 100644
index 000000000..c5deca00b
--- /dev/null
+++ b/src/core/hle/kernel/k_scoped_resource_reservation.h
@@ -0,0 +1,67 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include "common/common_types.h"
11#include "core/hle/kernel/k_resource_limit.h"
12#include "core/hle/kernel/process.h"
13
14namespace Kernel {
15
16class KScopedResourceReservation {
17public:
18 explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
19 s64 v, s64 timeout)
20 : resource_limit(std::move(l)), value(v), resource(r) {
21 if (resource_limit && value) {
22 success = resource_limit->Reserve(resource, value, timeout);
23 } else {
24 success = true;
25 }
26 }
27
28 explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
29 s64 v = 1)
30 : resource_limit(std::move(l)), value(v), resource(r) {
31 if (resource_limit && value) {
32 success = resource_limit->Reserve(resource, value);
33 } else {
34 success = true;
35 }
36 }
37
38 explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t)
39 : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {}
40
41 explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1)
42 : KScopedResourceReservation(p->GetResourceLimit(), r, v) {}
43
44 ~KScopedResourceReservation() noexcept {
45 if (resource_limit && value && success) {
46 // resource was not committed, release the reservation.
47 resource_limit->Release(resource, value);
48 }
49 }
50
51 /// Commit the resource reservation, destruction of this object does not release the resource
52 void Commit() {
53 resource_limit = nullptr;
54 }
55
56 [[nodiscard]] bool Succeeded() const {
57 return success;
58 }
59
60private:
61 std::shared_ptr<KResourceLimit> resource_limit;
62 s64 value;
63 LimitableResource resource;
64 bool success;
65};
66
67} // namespace Kernel
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index 140cc46a7..82f72a0fe 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
40 // Check if the timeout is zero. 40 // Check if the timeout is zero.
41 if (timeout == 0) { 41 if (timeout == 0) {
42 slp.CancelSleep(); 42 slp.CancelSleep();
43 return Svc::ResultTimedOut; 43 return ResultTimedOut;
44 } 44 }
45 45
46 // Check if the thread should terminate. 46 // Check if the thread should terminate.
47 if (thread->IsTerminationRequested()) { 47 if (thread->IsTerminationRequested()) {
48 slp.CancelSleep(); 48 slp.CancelSleep();
49 return Svc::ResultTerminationRequested; 49 return ResultTerminationRequested;
50 } 50 }
51 51
52 // Check if waiting was canceled. 52 // Check if waiting was canceled.
53 if (thread->IsWaitCancelled()) { 53 if (thread->IsWaitCancelled()) {
54 slp.CancelSleep(); 54 slp.CancelSleep();
55 thread->ClearWaitCancelled(); 55 thread->ClearWaitCancelled();
56 return Svc::ResultCancelled; 56 return ResultCancelled;
57 } 57 }
58 58
59 // Add the waiters. 59 // Add the waiters.
@@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
75 75
76 // Mark the thread as waiting. 76 // Mark the thread as waiting.
77 thread->SetCancellable(); 77 thread->SetCancellable();
78 thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 78 thread->SetSyncedObject(nullptr, ResultTimedOut);
79 thread->SetState(ThreadState::Waiting); 79 thread->SetState(ThreadState::Waiting);
80 thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization); 80 thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
81 } 81 }
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index b59259c4f..e5620da5a 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -18,7 +18,6 @@
18#include "core/core.h" 18#include "core/core.h"
19#include "core/cpu_manager.h" 19#include "core/cpu_manager.h"
20#include "core/hardware_properties.h" 20#include "core/hardware_properties.h"
21#include "core/hle/kernel/errors.h"
22#include "core/hle/kernel/handle_table.h" 21#include "core/hle/kernel/handle_table.h"
23#include "core/hle/kernel/k_condition_variable.h" 22#include "core/hle/kernel/k_condition_variable.h"
24#include "core/hle/kernel/k_resource_limit.h" 23#include "core/hle/kernel/k_resource_limit.h"
@@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
127 126
128 // Set core ID and wait result. 127 // Set core ID and wait result.
129 core_id = phys_core; 128 core_id = phys_core;
130 wait_result = Svc::ResultNoSynchronizationObject; 129 wait_result = ResultNoSynchronizationObject;
131 130
132 // Set priorities. 131 // Set priorities.
133 priority = prio; 132 priority = prio;
@@ -238,7 +237,7 @@ void KThread::Finalize() {
238 while (it != waiter_list.end()) { 237 while (it != waiter_list.end()) {
239 // The thread shouldn't be a kernel waiter. 238 // The thread shouldn't be a kernel waiter.
240 it->SetLockOwner(nullptr); 239 it->SetLockOwner(nullptr);
241 it->SetSyncedObject(nullptr, Svc::ResultInvalidState); 240 it->SetSyncedObject(nullptr, ResultInvalidState);
242 it->Wakeup(); 241 it->Wakeup();
243 it = waiter_list.erase(it); 242 it = waiter_list.erase(it);
244 } 243 }
@@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
447 // If the core id is no-update magic, preserve the ideal core id. 446 // If the core id is no-update magic, preserve the ideal core id.
448 if (core_id == Svc::IdealCoreNoUpdate) { 447 if (core_id == Svc::IdealCoreNoUpdate) {
449 core_id = virtual_ideal_core_id; 448 core_id = virtual_ideal_core_id;
450 R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination); 449 R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination);
451 } 450 }
452 451
453 // Set the virtual core/affinity mask. 452 // Set the virtual core/affinity mask.
@@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
526 if (GetStackParameters().is_pinned) { 525 if (GetStackParameters().is_pinned) {
527 // Verify that the current thread isn't terminating. 526 // Verify that the current thread isn't terminating.
528 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), 527 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
529 Svc::ResultTerminationRequested); 528 ResultTerminationRequested);
530 529
531 // Note that the thread was pinned. 530 // Note that the thread was pinned.
532 thread_is_pinned = true; 531 thread_is_pinned = true;
@@ -604,7 +603,7 @@ void KThread::WaitCancel() {
604 sleeping_queue->WakeupThread(this); 603 sleeping_queue->WakeupThread(this);
605 wait_cancelled = true; 604 wait_cancelled = true;
606 } else { 605 } else {
607 SetSyncedObject(nullptr, Svc::ResultCancelled); 606 SetSyncedObject(nullptr, ResultCancelled);
608 SetState(ThreadState::Runnable); 607 SetState(ThreadState::Runnable);
609 wait_cancelled = false; 608 wait_cancelled = false;
610 } 609 }
@@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
663 // Verify our state. 662 // Verify our state.
664 const auto cur_state = GetState(); 663 const auto cur_state = GetState();
665 R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable), 664 R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable),
666 Svc::ResultInvalidState); 665 ResultInvalidState);
667 666
668 // Either pause or resume. 667 // Either pause or resume.
669 if (activity == Svc::ThreadActivity::Paused) { 668 if (activity == Svc::ThreadActivity::Paused) {
670 // Verify that we're not suspended. 669 // Verify that we're not suspended.
671 R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); 670 R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
672 671
673 // Suspend. 672 // Suspend.
674 RequestSuspend(SuspendType::Thread); 673 RequestSuspend(SuspendType::Thread);
@@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
676 ASSERT(activity == Svc::ThreadActivity::Runnable); 675 ASSERT(activity == Svc::ThreadActivity::Runnable);
677 676
678 // Verify that we're suspended. 677 // Verify that we're suspended.
679 R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); 678 R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
680 679
681 // Resume. 680 // Resume.
682 Resume(SuspendType::Thread); 681 Resume(SuspendType::Thread);
@@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
698 if (GetStackParameters().is_pinned) { 697 if (GetStackParameters().is_pinned) {
699 // Verify that the current thread isn't terminating. 698 // Verify that the current thread isn't terminating.
700 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), 699 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
701 Svc::ResultTerminationRequested); 700 ResultTerminationRequested);
702 701
703 // Note that the thread was pinned and not current. 702 // Note that the thread was pinned and not current.
704 thread_is_pinned = true; 703 thread_is_pinned = true;
@@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) {
745 KScopedSchedulerLock sl{kernel}; 744 KScopedSchedulerLock sl{kernel};
746 745
747 // Verify that we're suspended. 746 // Verify that we're suspended.
748 R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); 747 R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
749 748
750 // If we're not terminating, get the thread's user context. 749 // If we're not terminating, get the thread's user context.
751 if (!IsTerminationRequested()) { 750 if (!IsTerminationRequested()) {
@@ -905,12 +904,11 @@ ResultCode KThread::Run() {
905 KScopedSchedulerLock lk{kernel}; 904 KScopedSchedulerLock lk{kernel};
906 905
907 // If either this thread or the current thread are requesting termination, note it. 906 // If either this thread or the current thread are requesting termination, note it.
908 R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested); 907 R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested);
909 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), 908 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested);
910 Svc::ResultTerminationRequested);
911 909
912 // Ensure our thread state is correct. 910 // Ensure our thread state is correct.
913 R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState); 911 R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState);
914 912
915 // If the current thread has been asked to suspend, suspend it and retry. 913 // If the current thread has been asked to suspend, suspend it and retry.
916 if (GetCurrentThread(kernel).IsSuspended()) { 914 if (GetCurrentThread(kernel).IsSuspended()) {
@@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) {
962 // Check if the thread should terminate. 960 // Check if the thread should terminate.
963 if (IsTerminationRequested()) { 961 if (IsTerminationRequested()) {
964 slp.CancelSleep(); 962 slp.CancelSleep();
965 return Svc::ResultTerminationRequested; 963 return ResultTerminationRequested;
966 } 964 }
967 965
968 // Mark the thread as waiting. 966 // Mark the thread as waiting.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index b20c2d13a..b6e6f115e 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -26,7 +26,6 @@
26#include "core/device_memory.h" 26#include "core/device_memory.h"
27#include "core/hardware_properties.h" 27#include "core/hardware_properties.h"
28#include "core/hle/kernel/client_port.h" 28#include "core/hle/kernel/client_port.h"
29#include "core/hle/kernel/errors.h"
30#include "core/hle/kernel/handle_table.h" 29#include "core/hle/kernel/handle_table.h"
31#include "core/hle/kernel/k_resource_limit.h" 30#include "core/hle/kernel/k_resource_limit.h"
32#include "core/hle/kernel/k_scheduler.h" 31#include "core/hle/kernel/k_scheduler.h"
@@ -39,6 +38,7 @@
39#include "core/hle/kernel/process.h" 38#include "core/hle/kernel/process.h"
40#include "core/hle/kernel/service_thread.h" 39#include "core/hle/kernel/service_thread.h"
41#include "core/hle/kernel/shared_memory.h" 40#include "core/hle/kernel/shared_memory.h"
41#include "core/hle/kernel/svc_results.h"
42#include "core/hle/kernel/time_manager.h" 42#include "core/hle/kernel/time_manager.h"
43#include "core/hle/lock.h" 43#include "core/hle/lock.h"
44#include "core/hle/result.h" 44#include "core/hle/result.h"
@@ -141,11 +141,17 @@ struct KernelCore::Impl {
141 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess()); 141 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess());
142 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200) 142 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200)
143 .IsSuccess()); 143 .IsSuccess());
144 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess()); 144 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess());
145 145
146 if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) { 146 // Derived from recent software updates. The kernel reserves 27MB
147 constexpr u64 kernel_size{0x1b00000};
148 if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) {
147 UNREACHABLE(); 149 UNREACHABLE();
148 } 150 }
151 // Reserve secure applet memory, introduced in firmware 5.0.0
152 constexpr u64 secure_applet_memory_size{0x400000};
153 ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory,
154 secure_applet_memory_size));
149 } 155 }
150 156
151 void InitializePreemption(KernelCore& kernel) { 157 void InitializePreemption(KernelCore& kernel) {
@@ -302,8 +308,11 @@ struct KernelCore::Impl {
302 // Allocate slab heaps 308 // Allocate slab heaps
303 user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>(); 309 user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>();
304 310
311 constexpr u64 user_slab_heap_size{0x1ef000};
312 // Reserve slab heaps
313 ASSERT(
314 system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
305 // Initialize slab heaps 315 // Initialize slab heaps
306 constexpr u64 user_slab_heap_size{0x3de000};
307 user_slab_heap_pages->Initialize( 316 user_slab_heap_pages->Initialize(
308 system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), 317 system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
309 user_slab_heap_size); 318 user_slab_heap_size);
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp
index acf13585c..77f135cdc 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -8,9 +8,9 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/scope_exit.h" 10#include "common/scope_exit.h"
11#include "core/hle/kernel/errors.h"
12#include "core/hle/kernel/memory/memory_manager.h" 11#include "core/hle/kernel/memory/memory_manager.h"
13#include "core/hle/kernel/memory/page_linked_list.h" 12#include "core/hle/kernel/memory/page_linked_list.h"
13#include "core/hle/kernel/svc_results.h"
14 14
15namespace Kernel::Memory { 15namespace Kernel::Memory {
16 16
@@ -95,7 +95,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
95 // Choose a heap based on our page size request 95 // Choose a heap based on our page size request
96 const s32 heap_index{PageHeap::GetBlockIndex(num_pages)}; 96 const s32 heap_index{PageHeap::GetBlockIndex(num_pages)};
97 if (heap_index < 0) { 97 if (heap_index < 0) {
98 return ERR_OUT_OF_MEMORY; 98 return ResultOutOfMemory;
99 } 99 }
100 100
101 // TODO (bunnei): Support multiple managers 101 // TODO (bunnei): Support multiple managers
@@ -140,7 +140,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
140 140
141 // Only succeed if we allocated as many pages as we wanted 141 // Only succeed if we allocated as many pages as we wanted
142 if (num_pages) { 142 if (num_pages) {
143 return ERR_OUT_OF_MEMORY; 143 return ResultOutOfMemory;
144 } 144 }
145 145
146 // We succeeded! 146 // We succeeded!
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp
index 7de91c768..00ed9b881 100644
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -6,8 +6,7 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/scope_exit.h" 7#include "common/scope_exit.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/k_scoped_resource_reservation.h"
10#include "core/hle/kernel/k_resource_limit.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/memory/address_space_info.h" 11#include "core/hle/kernel/memory/address_space_info.h"
13#include "core/hle/kernel/memory/memory_block.h" 12#include "core/hle/kernel/memory/memory_block.h"
@@ -16,6 +15,7 @@
16#include "core/hle/kernel/memory/page_table.h" 15#include "core/hle/kernel/memory/page_table.h"
17#include "core/hle/kernel/memory/system_control.h" 16#include "core/hle/kernel/memory/system_control.h"
18#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/svc_results.h"
19#include "core/memory.h" 19#include "core/memory.h"
20 20
21namespace Kernel::Memory { 21namespace Kernel::Memory {
@@ -141,7 +141,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
141 (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)}; 141 (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)};
142 if (alloc_size < needed_size) { 142 if (alloc_size < needed_size) {
143 UNREACHABLE(); 143 UNREACHABLE();
144 return ERR_OUT_OF_MEMORY; 144 return ResultOutOfMemory;
145 } 145 }
146 146
147 const std::size_t remaining_size{alloc_size - needed_size}; 147 const std::size_t remaining_size{alloc_size - needed_size};
@@ -277,11 +277,11 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt
277 const u64 size{num_pages * PageSize}; 277 const u64 size{num_pages * PageSize};
278 278
279 if (!CanContain(addr, size, state)) { 279 if (!CanContain(addr, size, state)) {
280 return ERR_INVALID_ADDRESS_STATE; 280 return ResultInvalidCurrentMemory;
281 } 281 }
282 282
283 if (IsRegionMapped(addr, size)) { 283 if (IsRegionMapped(addr, size)) {
284 return ERR_INVALID_ADDRESS_STATE; 284 return ResultInvalidCurrentMemory;
285 } 285 }
286 286
287 PageLinkedList page_linked_list; 287 PageLinkedList page_linked_list;
@@ -307,7 +307,7 @@ ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::
307 MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); 307 MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
308 308
309 if (IsRegionMapped(dst_addr, size)) { 309 if (IsRegionMapped(dst_addr, size)) {
310 return ERR_INVALID_ADDRESS_STATE; 310 return ResultInvalidCurrentMemory;
311 } 311 }
312 312
313 PageLinkedList page_linked_list; 313 PageLinkedList page_linked_list;
@@ -409,27 +409,25 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
409 return RESULT_SUCCESS; 409 return RESULT_SUCCESS;
410 } 410 }
411 411
412 auto process{system.Kernel().CurrentProcess()};
413 const std::size_t remaining_size{size - mapped_size}; 412 const std::size_t remaining_size{size - mapped_size};
414 const std::size_t remaining_pages{remaining_size / PageSize}; 413 const std::size_t remaining_pages{remaining_size / PageSize};
415 414
416 if (process->GetResourceLimit() && 415 // Reserve the memory from the process resource limit.
417 !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) { 416 KScopedResourceReservation memory_reservation(
418 return ERR_RESOURCE_LIMIT_EXCEEDED; 417 system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
418 remaining_size);
419 if (!memory_reservation.Succeeded()) {
420 LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size);
421 return ResultResourceLimitedExceeded;
419 } 422 }
420 423
421 PageLinkedList page_linked_list; 424 PageLinkedList page_linked_list;
422 {
423 auto block_guard = detail::ScopeExit([&] {
424 system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool);
425 process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size);
426 });
427 425
428 CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, 426 CASCADE_CODE(
429 memory_pool)); 427 system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool));
430 428
431 block_guard.Cancel(); 429 // We succeeded, so commit the memory reservation.
432 } 430 memory_reservation.Commit();
433 431
434 MapPhysicalMemory(page_linked_list, addr, end_addr); 432 MapPhysicalMemory(page_linked_list, addr, end_addr);
435 433
@@ -454,12 +452,12 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
454 block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { 452 block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
455 if (info.state == MemoryState::Normal) { 453 if (info.state == MemoryState::Normal) {
456 if (info.attribute != MemoryAttribute::None) { 454 if (info.attribute != MemoryAttribute::None) {
457 result = ERR_INVALID_ADDRESS_STATE; 455 result = ResultInvalidCurrentMemory;
458 return; 456 return;
459 } 457 }
460 mapped_size += GetSizeInRange(info, addr, end_addr); 458 mapped_size += GetSizeInRange(info, addr, end_addr);
461 } else if (info.state != MemoryState::Free) { 459 } else if (info.state != MemoryState::Free) {
462 result = ERR_INVALID_ADDRESS_STATE; 460 result = ResultInvalidCurrentMemory;
463 } 461 }
464 }); 462 });
465 463
@@ -526,7 +524,7 @@ ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
526 MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); 524 MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
527 525
528 if (IsRegionMapped(dst_addr, size)) { 526 if (IsRegionMapped(dst_addr, size)) {
529 return ERR_INVALID_ADDRESS_STATE; 527 return ResultInvalidCurrentMemory;
530 } 528 }
531 529
532 PageLinkedList page_linked_list; 530 PageLinkedList page_linked_list;
@@ -577,7 +575,7 @@ ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
577 AddRegionToPages(dst_addr, num_pages, dst_pages); 575 AddRegionToPages(dst_addr, num_pages, dst_pages);
578 576
579 if (!dst_pages.IsEqual(src_pages)) { 577 if (!dst_pages.IsEqual(src_pages)) {
580 return ERR_INVALID_MEMORY_RANGE; 578 return ResultInvalidMemoryRange;
581 } 579 }
582 580
583 { 581 {
@@ -626,11 +624,11 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem
626 const std::size_t size{num_pages * PageSize}; 624 const std::size_t size{num_pages * PageSize};
627 625
628 if (!CanContain(addr, size, state)) { 626 if (!CanContain(addr, size, state)) {
629 return ERR_INVALID_ADDRESS_STATE; 627 return ResultInvalidCurrentMemory;
630 } 628 }
631 629
632 if (IsRegionMapped(addr, num_pages * PageSize)) { 630 if (IsRegionMapped(addr, num_pages * PageSize)) {
633 return ERR_INVALID_ADDRESS_STATE; 631 return ResultInvalidCurrentMemory;
634 } 632 }
635 633
636 CASCADE_CODE(MapPages(addr, page_linked_list, perm)); 634 CASCADE_CODE(MapPages(addr, page_linked_list, perm));
@@ -768,7 +766,7 @@ ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
768ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { 766ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
769 767
770 if (size > heap_region_end - heap_region_start) { 768 if (size > heap_region_end - heap_region_start) {
771 return ERR_OUT_OF_MEMORY; 769 return ResultOutOfMemory;
772 } 770 }
773 771
774 const u64 previous_heap_size{GetHeapSize()}; 772 const u64 previous_heap_size{GetHeapSize()};
@@ -781,10 +779,14 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
781 779
782 const u64 delta{size - previous_heap_size}; 780 const u64 delta{size - previous_heap_size};
783 781
784 auto process{system.Kernel().CurrentProcess()}; 782 // Reserve memory for the heap extension.
785 if (process->GetResourceLimit() && delta != 0 && 783 KScopedResourceReservation memory_reservation(
786 !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) { 784 system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
787 return ERR_RESOURCE_LIMIT_EXCEEDED; 785 delta);
786
787 if (!memory_reservation.Succeeded()) {
788 LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
789 return ResultResourceLimitedExceeded;
788 } 790 }
789 791
790 PageLinkedList page_linked_list; 792 PageLinkedList page_linked_list;
@@ -794,12 +796,15 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
794 system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); 796 system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
795 797
796 if (IsRegionMapped(current_heap_addr, delta)) { 798 if (IsRegionMapped(current_heap_addr, delta)) {
797 return ERR_INVALID_ADDRESS_STATE; 799 return ResultInvalidCurrentMemory;
798 } 800 }
799 801
800 CASCADE_CODE( 802 CASCADE_CODE(
801 Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); 803 Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));
802 804
805 // Succeeded in allocation, commit the resource reservation
806 memory_reservation.Commit();
807
803 block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal, 808 block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal,
804 MemoryPermission::ReadAndWrite); 809 MemoryPermission::ReadAndWrite);
805 810
@@ -816,17 +821,17 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
816 std::lock_guard lock{page_table_lock}; 821 std::lock_guard lock{page_table_lock};
817 822
818 if (!CanContain(region_start, region_num_pages * PageSize, state)) { 823 if (!CanContain(region_start, region_num_pages * PageSize, state)) {
819 return ERR_INVALID_ADDRESS_STATE; 824 return ResultInvalidCurrentMemory;
820 } 825 }
821 826
822 if (region_num_pages <= needed_num_pages) { 827 if (region_num_pages <= needed_num_pages) {
823 return ERR_OUT_OF_MEMORY; 828 return ResultOutOfMemory;
824 } 829 }
825 830
826 const VAddr addr{ 831 const VAddr addr{
827 AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)}; 832 AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)};
828 if (!addr) { 833 if (!addr) {
829 return ERR_OUT_OF_MEMORY; 834 return ResultOutOfMemory;
830 } 835 }
831 836
832 if (is_map_only) { 837 if (is_map_only) {
@@ -1105,13 +1110,13 @@ constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryS
1105 MemoryAttribute attr) const { 1110 MemoryAttribute attr) const {
1106 // Validate the states match expectation 1111 // Validate the states match expectation
1107 if ((info.state & state_mask) != state) { 1112 if ((info.state & state_mask) != state) {
1108 return ERR_INVALID_ADDRESS_STATE; 1113 return ResultInvalidCurrentMemory;
1109 } 1114 }
1110 if ((info.perm & perm_mask) != perm) { 1115 if ((info.perm & perm_mask) != perm) {
1111 return ERR_INVALID_ADDRESS_STATE; 1116 return ResultInvalidCurrentMemory;
1112 } 1117 }
1113 if ((info.attribute & attr_mask) != attr) { 1118 if ((info.attribute & attr_mask) != attr) {
1114 return ERR_INVALID_ADDRESS_STATE; 1119 return ResultInvalidCurrentMemory;
1115 } 1120 }
1116 1121
1117 return RESULT_SUCCESS; 1122 return RESULT_SUCCESS;
@@ -1138,14 +1143,14 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission*
1138 while (true) { 1143 while (true) {
1139 // Validate the current block 1144 // Validate the current block
1140 if (!(info.state == first_state)) { 1145 if (!(info.state == first_state)) {
1141 return ERR_INVALID_ADDRESS_STATE; 1146 return ResultInvalidCurrentMemory;
1142 } 1147 }
1143 if (!(info.perm == first_perm)) { 1148 if (!(info.perm == first_perm)) {
1144 return ERR_INVALID_ADDRESS_STATE; 1149 return ResultInvalidCurrentMemory;
1145 } 1150 }
1146 if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) == 1151 if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) ==
1147 (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) { 1152 (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) {
1148 return ERR_INVALID_ADDRESS_STATE; 1153 return ResultInvalidCurrentMemory;
1149 } 1154 }
1150 1155
1151 // Validate against the provided masks 1156 // Validate against the provided masks
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 2286b292d..47b3ac57b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -14,9 +14,9 @@
14#include "core/device_memory.h" 14#include "core/device_memory.h"
15#include "core/file_sys/program_metadata.h" 15#include "core/file_sys/program_metadata.h"
16#include "core/hle/kernel/code_set.h" 16#include "core/hle/kernel/code_set.h"
17#include "core/hle/kernel/errors.h"
18#include "core/hle/kernel/k_resource_limit.h" 17#include "core/hle/kernel/k_resource_limit.h"
19#include "core/hle/kernel/k_scheduler.h" 18#include "core/hle/kernel/k_scheduler.h"
19#include "core/hle/kernel/k_scoped_resource_reservation.h"
20#include "core/hle/kernel/k_thread.h" 20#include "core/hle/kernel/k_thread.h"
21#include "core/hle/kernel/kernel.h" 21#include "core/hle/kernel/kernel.h"
22#include "core/hle/kernel/memory/memory_block_manager.h" 22#include "core/hle/kernel/memory/memory_block_manager.h"
@@ -39,6 +39,7 @@ namespace {
39 */ 39 */
40void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { 40void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
41 const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); 41 const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
42 ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
42 auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0, 43 auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0,
43 owner_process.GetIdealCoreId(), stack_top, &owner_process); 44 owner_process.GetIdealCoreId(), stack_top, &owner_process);
44 45
@@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
117 118
118 std::shared_ptr<Process> process = std::make_shared<Process>(system); 119 std::shared_ptr<Process> process = std::make_shared<Process>(system);
119 process->name = std::move(name); 120 process->name = std::move(name);
121
122 // TODO: This is inaccurate
123 // The process should hold a reference to the kernel-wide resource limit.
120 process->resource_limit = std::make_shared<KResourceLimit>(kernel, system); 124 process->resource_limit = std::make_shared<KResourceLimit>(kernel, system);
121 process->status = ProcessStatus::Created; 125 process->status = ProcessStatus::Created;
122 process->program_id = 0; 126 process->program_id = 0;
@@ -155,6 +159,9 @@ void Process::DecrementThreadCount() {
155} 159}
156 160
157u64 Process::GetTotalPhysicalMemoryAvailable() const { 161u64 Process::GetTotalPhysicalMemoryAvailable() const {
162 // TODO: This is expected to always return the application memory pool size after accurately
163 // reserving kernel resources. The current workaround uses a process-local resource limit of
164 // application memory pool size, which is inaccurate.
158 const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) + 165 const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
159 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + 166 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
160 main_thread_stack_size}; 167 main_thread_stack_size};
@@ -248,8 +255,8 @@ ResultCode Process::Reset() {
248 KScopedSchedulerLock sl{kernel}; 255 KScopedSchedulerLock sl{kernel};
249 256
250 // Validate that we're in a state that we can reset. 257 // Validate that we're in a state that we can reset.
251 R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState); 258 R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState);
252 R_UNLESS(is_signaled, Svc::ResultInvalidState); 259 R_UNLESS(is_signaled, ResultInvalidState);
253 260
254 // Clear signaled. 261 // Clear signaled.
255 is_signaled = false; 262 is_signaled = false;
@@ -264,6 +271,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
264 system_resource_size = metadata.GetSystemResourceSize(); 271 system_resource_size = metadata.GetSystemResourceSize();
265 image_size = code_size; 272 image_size = code_size;
266 273
274 // Set initial resource limits
275 resource_limit->SetLimitValue(
276 LimitableResource::PhysicalMemory,
277 kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
278 KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
279 code_size + system_resource_size);
280 if (!memory_reservation.Succeeded()) {
281 LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes",
282 code_size + system_resource_size);
283 return ResultResourceLimitedExceeded;
284 }
267 // Initialize proces address space 285 // Initialize proces address space
268 if (const ResultCode result{ 286 if (const ResultCode result{
269 page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000, 287 page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000,
@@ -305,24 +323,22 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
305 UNREACHABLE(); 323 UNREACHABLE();
306 } 324 }
307 325
308 // Set initial resource limits
309 resource_limit->SetLimitValue(
310 LimitableResource::PhysicalMemory,
311 kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
312 resource_limit->SetLimitValue(LimitableResource::Threads, 608); 326 resource_limit->SetLimitValue(LimitableResource::Threads, 608);
313 resource_limit->SetLimitValue(LimitableResource::Events, 700); 327 resource_limit->SetLimitValue(LimitableResource::Events, 700);
314 resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128); 328 resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128);
315 resource_limit->SetLimitValue(LimitableResource::Sessions, 894); 329 resource_limit->SetLimitValue(LimitableResource::Sessions, 894);
316 ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size));
317 330
318 // Create TLS region 331 // Create TLS region
319 tls_region_address = CreateTLSRegion(); 332 tls_region_address = CreateTLSRegion();
333 memory_reservation.Commit();
320 334
321 return handle_table.SetSize(capabilities.GetHandleTableSize()); 335 return handle_table.SetSize(capabilities.GetHandleTableSize());
322} 336}
323 337
324void Process::Run(s32 main_thread_priority, u64 stack_size) { 338void Process::Run(s32 main_thread_priority, u64 stack_size) {
325 AllocateMainThreadStack(stack_size); 339 AllocateMainThreadStack(stack_size);
340 resource_limit->Reserve(LimitableResource::Threads, 1);
341 resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
326 342
327 const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; 343 const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
328 ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); 344 ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
@@ -330,8 +346,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
330 ChangeStatus(ProcessStatus::Running); 346 ChangeStatus(ProcessStatus::Running);
331 347
332 SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); 348 SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
333 resource_limit->Reserve(LimitableResource::Threads, 1);
334 resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
335} 349}
336 350
337void Process::PrepareForTermination() { 351void Process::PrepareForTermination() {
@@ -358,6 +372,11 @@ void Process::PrepareForTermination() {
358 FreeTLSRegion(tls_region_address); 372 FreeTLSRegion(tls_region_address);
359 tls_region_address = 0; 373 tls_region_address = 0;
360 374
375 if (resource_limit) {
376 resource_limit->Release(LimitableResource::PhysicalMemory,
377 main_thread_stack_size + image_size);
378 }
379
361 ChangeStatus(ProcessStatus::Exited); 380 ChangeStatus(ProcessStatus::Exited);
362} 381}
363 382
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 0566311b6..7c567049e 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -6,10 +6,10 @@
6 6
7#include "common/bit_util.h" 7#include "common/bit_util.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/handle_table.h" 9#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/memory/page_table.h" 10#include "core/hle/kernel/memory/page_table.h"
12#include "core/hle/kernel/process_capability.h" 11#include "core/hle/kernel/process_capability.h"
12#include "core/hle/kernel/svc_results.h"
13 13
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
@@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
123 // If there's only one, then there's a problem. 123 // If there's only one, then there's a problem.
124 if (i >= num_capabilities) { 124 if (i >= num_capabilities) {
125 LOG_ERROR(Kernel, "Invalid combination! i={}", i); 125 LOG_ERROR(Kernel, "Invalid combination! i={}", i);
126 return ERR_INVALID_COMBINATION; 126 return ResultInvalidCombination;
127 } 127 }
128 128
129 const auto size_flags = capabilities[i]; 129 const auto size_flags = capabilities[i];
130 if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) { 130 if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) {
131 LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags); 131 LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags);
132 return ERR_INVALID_COMBINATION; 132 return ResultInvalidCombination;
133 } 133 }
134 134
135 const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table); 135 const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table);
@@ -159,7 +159,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
159 const auto type = GetCapabilityType(flag); 159 const auto type = GetCapabilityType(flag);
160 160
161 if (type == CapabilityType::Unset) { 161 if (type == CapabilityType::Unset) {
162 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 162 return ResultInvalidCapabilityDescriptor;
163 } 163 }
164 164
165 // Bail early on ignorable entries, as one would expect, 165 // Bail early on ignorable entries, as one would expect,
@@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
176 LOG_ERROR(Kernel, 176 LOG_ERROR(Kernel,
177 "Attempted to initialize flags that may only be initialized once. set_flags={}", 177 "Attempted to initialize flags that may only be initialized once. set_flags={}",
178 set_flags); 178 set_flags);
179 return ERR_INVALID_COMBINATION; 179 return ResultInvalidCombination;
180 } 180 }
181 set_flags |= set_flag; 181 set_flags |= set_flag;
182 182
@@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
202 } 202 }
203 203
204 LOG_ERROR(Kernel, "Invalid capability type! type={}", type); 204 LOG_ERROR(Kernel, "Invalid capability type! type={}", type);
205 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 205 return ResultInvalidCapabilityDescriptor;
206} 206}
207 207
208void ProcessCapabilities::Clear() { 208void ProcessCapabilities::Clear() {
@@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
225 if (priority_mask != 0 || core_mask != 0) { 225 if (priority_mask != 0 || core_mask != 0) {
226 LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}", 226 LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}",
227 priority_mask, core_mask); 227 priority_mask, core_mask);
228 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 228 return ResultInvalidCapabilityDescriptor;
229 } 229 }
230 230
231 const u32 core_num_min = (flags >> 16) & 0xFF; 231 const u32 core_num_min = (flags >> 16) & 0xFF;
@@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
233 if (core_num_min > core_num_max) { 233 if (core_num_min > core_num_max) {
234 LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}", 234 LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}",
235 core_num_min, core_num_max); 235 core_num_min, core_num_max);
236 return ERR_INVALID_COMBINATION; 236 return ResultInvalidCombination;
237 } 237 }
238 238
239 const u32 priority_min = (flags >> 10) & 0x3F; 239 const u32 priority_min = (flags >> 10) & 0x3F;
@@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
242 LOG_ERROR(Kernel, 242 LOG_ERROR(Kernel,
243 "Priority min is greater than priority max! priority_min={}, priority_max={}", 243 "Priority min is greater than priority max! priority_min={}, priority_max={}",
244 core_num_min, priority_max); 244 core_num_min, priority_max);
245 return ERR_INVALID_COMBINATION; 245 return ResultInvalidCombination;
246 } 246 }
247 247
248 // The switch only has 4 usable cores. 248 // The switch only has 4 usable cores.
249 if (core_num_max >= 4) { 249 if (core_num_max >= 4) {
250 LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max); 250 LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max);
251 return ERR_INVALID_PROCESSOR_ID; 251 return ResultInvalidCoreId;
252 } 252 }
253 253
254 const auto make_mask = [](u64 min, u64 max) { 254 const auto make_mask = [](u64 min, u64 max) {
@@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
269 269
270 // If we've already set this svc before, bail. 270 // If we've already set this svc before, bail.
271 if ((set_svc_bits & svc_bit) != 0) { 271 if ((set_svc_bits & svc_bit) != 0) {
272 return ERR_INVALID_COMBINATION; 272 return ResultInvalidCombination;
273 } 273 }
274 set_svc_bits |= svc_bit; 274 set_svc_bits |= svc_bit;
275 275
@@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
283 283
284 if (svc_number >= svc_capabilities.size()) { 284 if (svc_number >= svc_capabilities.size()) {
285 LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number); 285 LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number);
286 return ERR_OUT_OF_RANGE; 286 return ResultOutOfRange;
287 } 287 }
288 288
289 svc_capabilities[svc_number] = true; 289 svc_capabilities[svc_number] = true;
@@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
321 if (interrupt >= interrupt_capabilities.size()) { 321 if (interrupt >= interrupt_capabilities.size()) {
322 LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}", 322 LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}",
323 interrupt); 323 interrupt);
324 return ERR_OUT_OF_RANGE; 324 return ResultOutOfRange;
325 } 325 }
326 326
327 interrupt_capabilities[interrupt] = true; 327 interrupt_capabilities[interrupt] = true;
@@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) {
334 const u32 reserved = flags >> 17; 334 const u32 reserved = flags >> 17;
335 if (reserved != 0) { 335 if (reserved != 0) {
336 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); 336 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
337 return ERR_RESERVED_VALUE; 337 return ResultReservedValue;
338 } 338 }
339 339
340 program_type = static_cast<ProgramType>((flags >> 14) & 0b111); 340 program_type = static_cast<ProgramType>((flags >> 14) & 0b111);
@@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
354 LOG_ERROR(Kernel, 354 LOG_ERROR(Kernel,
355 "Kernel version is non zero or flags are too small! major_version={}, flags={}", 355 "Kernel version is non zero or flags are too small! major_version={}, flags={}",
356 major_version, flags); 356 major_version, flags);
357 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 357 return ResultInvalidCapabilityDescriptor;
358 } 358 }
359 359
360 kernel_version = flags; 360 kernel_version = flags;
@@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
365 const u32 reserved = flags >> 26; 365 const u32 reserved = flags >> 26;
366 if (reserved != 0) { 366 if (reserved != 0) {
367 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); 367 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
368 return ERR_RESERVED_VALUE; 368 return ResultReservedValue;
369 } 369 }
370 370
371 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); 371 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
@@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) {
376 const u32 reserved = flags >> 19; 376 const u32 reserved = flags >> 19;
377 if (reserved != 0) { 377 if (reserved != 0) {
378 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); 378 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
379 return ERR_RESERVED_VALUE; 379 return ResultReservedValue;
380 } 380 }
381 381
382 is_debuggable = (flags & 0x20000) != 0; 382 is_debuggable = (flags & 0x20000) != 0;
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index fe7a483c4..5d17346ad 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -5,11 +5,11 @@
5#include <tuple> 5#include <tuple>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "core/hle/kernel/client_port.h" 7#include "core/hle/kernel/client_port.h"
8#include "core/hle/kernel/errors.h"
9#include "core/hle/kernel/k_thread.h" 8#include "core/hle/kernel/k_thread.h"
10#include "core/hle/kernel/object.h" 9#include "core/hle/kernel/object.h"
11#include "core/hle/kernel/server_port.h" 10#include "core/hle/kernel/server_port.h"
12#include "core/hle/kernel/server_session.h" 11#include "core/hle/kernel/server_session.h"
12#include "core/hle/kernel/svc_results.h"
13 13
14namespace Kernel { 14namespace Kernel {
15 15
@@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default;
18 18
19ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { 19ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
20 if (pending_sessions.empty()) { 20 if (pending_sessions.empty()) {
21 return ERR_NOT_FOUND; 21 return ResultNotFound;
22 } 22 }
23 23
24 auto session = std::move(pending_sessions.back()); 24 auto session = std::move(pending_sessions.back());
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index 75304b961..8830d4e91 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -4,15 +4,23 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/hle/kernel/client_session.h" 6#include "core/hle/kernel/client_session.h"
7#include "core/hle/kernel/k_scoped_resource_reservation.h"
7#include "core/hle/kernel/server_session.h" 8#include "core/hle/kernel/server_session.h"
8#include "core/hle/kernel/session.h" 9#include "core/hle/kernel/session.h"
9 10
10namespace Kernel { 11namespace Kernel {
11 12
12Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {} 13Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
13Session::~Session() = default; 14Session::~Session() {
15 // Release reserved resource when the Session pair was created.
16 kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1);
17}
14 18
15Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { 19Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
20 // Reserve a new session from the resource limit.
21 KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(),
22 LimitableResource::Sessions);
23 ASSERT(session_reservation.Succeeded());
16 auto session{std::make_shared<Session>(kernel)}; 24 auto session{std::make_shared<Session>(kernel)};
17 auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()}; 25 auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()};
18 auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()}; 26 auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()};
@@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
21 session->client = client_session; 29 session->client = client_session;
22 session->server = server_session; 30 session->server = server_session;
23 31
32 session_reservation.Commit();
24 return std::make_pair(std::move(client_session), std::move(server_session)); 33 return std::make_pair(std::move(client_session), std::move(server_session));
25} 34}
26 35
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 0cd467110..2eadd51d7 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/core.h" 6#include "core/core.h"
7#include "core/hle/kernel/k_scoped_resource_reservation.h"
7#include "core/hle/kernel/kernel.h" 8#include "core/hle/kernel/kernel.h"
8#include "core/hle/kernel/memory/page_table.h" 9#include "core/hle/kernel/memory/page_table.h"
9#include "core/hle/kernel/shared_memory.h" 10#include "core/hle/kernel/shared_memory.h"
@@ -13,7 +14,9 @@ namespace Kernel {
13SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) 14SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory)
14 : Object{kernel}, device_memory{device_memory} {} 15 : Object{kernel}, device_memory{device_memory} {}
15 16
16SharedMemory::~SharedMemory() = default; 17SharedMemory::~SharedMemory() {
18 kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size);
19}
17 20
18std::shared_ptr<SharedMemory> SharedMemory::Create( 21std::shared_ptr<SharedMemory> SharedMemory::Create(
19 KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, 22 KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
@@ -21,6 +24,11 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
21 Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, 24 Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size,
22 std::string name) { 25 std::string name) {
23 26
27 const auto resource_limit = kernel.GetSystemResourceLimit();
28 KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
29 size);
30 ASSERT(memory_reservation.Succeeded());
31
24 std::shared_ptr<SharedMemory> shared_memory{ 32 std::shared_ptr<SharedMemory> shared_memory{
25 std::make_shared<SharedMemory>(kernel, device_memory)}; 33 std::make_shared<SharedMemory>(kernel, device_memory)};
26 34
@@ -32,6 +40,7 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
32 shared_memory->size = size; 40 shared_memory->size = size;
33 shared_memory->name = name; 41 shared_memory->name = name;
34 42
43 memory_reservation.Commit();
35 return shared_memory; 44 return shared_memory;
36} 45}
37 46
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 26650a513..31d899e06 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -23,7 +23,6 @@
23#include "core/cpu_manager.h" 23#include "core/cpu_manager.h"
24#include "core/hle/kernel/client_port.h" 24#include "core/hle/kernel/client_port.h"
25#include "core/hle/kernel/client_session.h" 25#include "core/hle/kernel/client_session.h"
26#include "core/hle/kernel/errors.h"
27#include "core/hle/kernel/handle_table.h" 26#include "core/hle/kernel/handle_table.h"
28#include "core/hle/kernel/k_address_arbiter.h" 27#include "core/hle/kernel/k_address_arbiter.h"
29#include "core/hle/kernel/k_condition_variable.h" 28#include "core/hle/kernel/k_condition_variable.h"
@@ -31,6 +30,7 @@
31#include "core/hle/kernel/k_readable_event.h" 30#include "core/hle/kernel/k_readable_event.h"
32#include "core/hle/kernel/k_resource_limit.h" 31#include "core/hle/kernel/k_resource_limit.h"
33#include "core/hle/kernel/k_scheduler.h" 32#include "core/hle/kernel/k_scheduler.h"
33#include "core/hle/kernel/k_scoped_resource_reservation.h"
34#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" 34#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
35#include "core/hle/kernel/k_synchronization_object.h" 35#include "core/hle/kernel/k_synchronization_object.h"
36#include "core/hle/kernel/k_thread.h" 36#include "core/hle/kernel/k_thread.h"
@@ -71,49 +71,49 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
71 VAddr src_addr, u64 size) { 71 VAddr src_addr, u64 size) {
72 if (!Common::Is4KBAligned(dst_addr)) { 72 if (!Common::Is4KBAligned(dst_addr)) {
73 LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr); 73 LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr);
74 return ERR_INVALID_ADDRESS; 74 return ResultInvalidAddress;
75 } 75 }
76 76
77 if (!Common::Is4KBAligned(src_addr)) { 77 if (!Common::Is4KBAligned(src_addr)) {
78 LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr); 78 LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr);
79 return ERR_INVALID_SIZE; 79 return ResultInvalidSize;
80 } 80 }
81 81
82 if (size == 0) { 82 if (size == 0) {
83 LOG_ERROR(Kernel_SVC, "Size is 0"); 83 LOG_ERROR(Kernel_SVC, "Size is 0");
84 return ERR_INVALID_SIZE; 84 return ResultInvalidSize;
85 } 85 }
86 86
87 if (!Common::Is4KBAligned(size)) { 87 if (!Common::Is4KBAligned(size)) {
88 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size); 88 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size);
89 return ERR_INVALID_SIZE; 89 return ResultInvalidSize;
90 } 90 }
91 91
92 if (!IsValidAddressRange(dst_addr, size)) { 92 if (!IsValidAddressRange(dst_addr, size)) {
93 LOG_ERROR(Kernel_SVC, 93 LOG_ERROR(Kernel_SVC,
94 "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}", 94 "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
95 dst_addr, size); 95 dst_addr, size);
96 return ERR_INVALID_ADDRESS_STATE; 96 return ResultInvalidCurrentMemory;
97 } 97 }
98 98
99 if (!IsValidAddressRange(src_addr, size)) { 99 if (!IsValidAddressRange(src_addr, size)) {
100 LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}", 100 LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
101 src_addr, size); 101 src_addr, size);
102 return ERR_INVALID_ADDRESS_STATE; 102 return ResultInvalidCurrentMemory;
103 } 103 }
104 104
105 if (!manager.IsInsideAddressSpace(src_addr, size)) { 105 if (!manager.IsInsideAddressSpace(src_addr, size)) {
106 LOG_ERROR(Kernel_SVC, 106 LOG_ERROR(Kernel_SVC,
107 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 107 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
108 src_addr, size); 108 src_addr, size);
109 return ERR_INVALID_ADDRESS_STATE; 109 return ResultInvalidCurrentMemory;
110 } 110 }
111 111
112 if (manager.IsOutsideStackRegion(dst_addr, size)) { 112 if (manager.IsOutsideStackRegion(dst_addr, size)) {
113 LOG_ERROR(Kernel_SVC, 113 LOG_ERROR(Kernel_SVC,
114 "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", 114 "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}",
115 dst_addr, size); 115 dst_addr, size);
116 return ERR_INVALID_MEMORY_RANGE; 116 return ResultInvalidMemoryRange;
117 } 117 }
118 118
119 if (manager.IsInsideHeapRegion(dst_addr, size)) { 119 if (manager.IsInsideHeapRegion(dst_addr, size)) {
@@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
121 "Destination does not fit within the heap region, addr=0x{:016X}, " 121 "Destination does not fit within the heap region, addr=0x{:016X}, "
122 "size=0x{:016X}", 122 "size=0x{:016X}",
123 dst_addr, size); 123 dst_addr, size);
124 return ERR_INVALID_MEMORY_RANGE; 124 return ResultInvalidMemoryRange;
125 } 125 }
126 126
127 if (manager.IsInsideAliasRegion(dst_addr, size)) { 127 if (manager.IsInsideAliasRegion(dst_addr, size)) {
@@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
129 "Destination does not fit within the map region, addr=0x{:016X}, " 129 "Destination does not fit within the map region, addr=0x{:016X}, "
130 "size=0x{:016X}", 130 "size=0x{:016X}",
131 dst_addr, size); 131 dst_addr, size);
132 return ERR_INVALID_MEMORY_RANGE; 132 return ResultInvalidMemoryRange;
133 } 133 }
134 134
135 return RESULT_SUCCESS; 135 return RESULT_SUCCESS;
@@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
138enum class ResourceLimitValueType { 138enum class ResourceLimitValueType {
139 CurrentValue, 139 CurrentValue,
140 LimitValue, 140 LimitValue,
141 PeakValue,
141}; 142};
142 143
143ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, 144ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
@@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
146 const auto type = static_cast<LimitableResource>(resource_type); 147 const auto type = static_cast<LimitableResource>(resource_type);
147 if (!IsValidResourceType(type)) { 148 if (!IsValidResourceType(type)) {
148 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); 149 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
149 return ERR_INVALID_ENUM_VALUE; 150 return ResultInvalidEnumValue;
150 } 151 }
151 152
152 const auto* const current_process = system.Kernel().CurrentProcess(); 153 const auto* const current_process = system.Kernel().CurrentProcess();
@@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
157 if (!resource_limit_object) { 158 if (!resource_limit_object) {
158 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", 159 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
159 resource_limit); 160 resource_limit);
160 return ERR_INVALID_HANDLE; 161 return ResultInvalidHandle;
161 } 162 }
162 163
163 if (value_type == ResourceLimitValueType::CurrentValue) { 164 switch (value_type) {
165 case ResourceLimitValueType::CurrentValue:
164 return MakeResult(resource_limit_object->GetCurrentValue(type)); 166 return MakeResult(resource_limit_object->GetCurrentValue(type));
167 case ResourceLimitValueType::LimitValue:
168 return MakeResult(resource_limit_object->GetLimitValue(type));
169 case ResourceLimitValueType::PeakValue:
170 return MakeResult(resource_limit_object->GetPeakValue(type));
171 default:
172 LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type);
173 return ResultInvalidEnumValue;
165 } 174 }
166
167 return MakeResult(resource_limit_object->GetLimitValue(type));
168} 175}
169} // Anonymous namespace 176} // Anonymous namespace
170 177
@@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
177 if ((heap_size % 0x200000) != 0) { 184 if ((heap_size % 0x200000) != 0) {
178 LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}", 185 LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
179 heap_size); 186 heap_size);
180 return ERR_INVALID_SIZE; 187 return ResultInvalidSize;
181 } 188 }
182 189
183 if (heap_size >= 0x200000000) { 190 if (heap_size >= 0x200000000) {
184 LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size); 191 LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
185 return ERR_INVALID_SIZE; 192 return ResultInvalidSize;
186 } 193 }
187 194
188 auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; 195 auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -208,19 +215,19 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
208 215
209 if (!Common::Is4KBAligned(address)) { 216 if (!Common::Is4KBAligned(address)) {
210 LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address); 217 LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address);
211 return ERR_INVALID_ADDRESS; 218 return ResultInvalidAddress;
212 } 219 }
213 220
214 if (size == 0 || !Common::Is4KBAligned(size)) { 221 if (size == 0 || !Common::Is4KBAligned(size)) {
215 LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.", 222 LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.",
216 size); 223 size);
217 return ERR_INVALID_ADDRESS; 224 return ResultInvalidAddress;
218 } 225 }
219 226
220 if (!IsValidAddressRange(address, size)) { 227 if (!IsValidAddressRange(address, size)) {
221 LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})", 228 LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})",
222 address, size); 229 address, size);
223 return ERR_INVALID_ADDRESS_STATE; 230 return ResultInvalidCurrentMemory;
224 } 231 }
225 232
226 const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)}; 233 const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)};
@@ -229,7 +236,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
229 LOG_ERROR(Kernel_SVC, 236 LOG_ERROR(Kernel_SVC,
230 "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}", 237 "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}",
231 attribute, mask); 238 attribute, mask);
232 return ERR_INVALID_COMBINATION; 239 return ResultInvalidCombination;
233 } 240 }
234 241
235 auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; 242 auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
293 LOG_ERROR(Kernel_SVC, 300 LOG_ERROR(Kernel_SVC,
294 "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", 301 "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
295 port_name_address); 302 port_name_address);
296 return ERR_NOT_FOUND; 303 return ResultNotFound;
297 } 304 }
298 305
299 static constexpr std::size_t PortNameMaxLength = 11; 306 static constexpr std::size_t PortNameMaxLength = 11;
@@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
302 if (port_name.size() > PortNameMaxLength) { 309 if (port_name.size() > PortNameMaxLength) {
303 LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength, 310 LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength,
304 port_name.size()); 311 port_name.size());
305 return ERR_OUT_OF_RANGE; 312 return ResultOutOfRange;
306 } 313 }
307 314
308 LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); 315 LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
@@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
311 const auto it = kernel.FindNamedPort(port_name); 318 const auto it = kernel.FindNamedPort(port_name);
312 if (!kernel.IsValidNamedPort(it)) { 319 if (!kernel.IsValidNamedPort(it)) {
313 LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); 320 LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
314 return ERR_NOT_FOUND; 321 return ResultNotFound;
315 } 322 }
316 323
317 ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1));
318
319 auto client_port = it->second; 324 auto client_port = it->second;
320 325
321 std::shared_ptr<ClientSession> client_session; 326 std::shared_ptr<ClientSession> client_session;
@@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
340 std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); 345 std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
341 if (!session) { 346 if (!session) {
342 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); 347 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
343 return ERR_INVALID_HANDLE; 348 return ResultInvalidHandle;
344 } 349 }
345 350
346 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); 351 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
@@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
405 const Process* const owner_process = thread->GetOwnerProcess(); 410 const Process* const owner_process = thread->GetOwnerProcess();
406 if (!owner_process) { 411 if (!owner_process) {
407 LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered."); 412 LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered.");
408 return ERR_INVALID_HANDLE; 413 return ResultInvalidHandle;
409 } 414 }
410 415
411 *process_id = owner_process->GetProcessID(); 416 *process_id = owner_process->GetProcessID();
@@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
415 // NOTE: This should also handle debug objects before returning. 420 // NOTE: This should also handle debug objects before returning.
416 421
417 LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle); 422 LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle);
418 return ERR_INVALID_HANDLE; 423 return ResultInvalidHandle;
419} 424}
420 425
421static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, 426static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
@@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
438 LOG_ERROR(Kernel_SVC, 443 LOG_ERROR(Kernel_SVC,
439 "Handle address is not a valid virtual address, handle_address=0x{:016X}", 444 "Handle address is not a valid virtual address, handle_address=0x{:016X}",
440 handles_address); 445 handles_address);
441 return ERR_INVALID_POINTER; 446 return ResultInvalidPointer;
442 } 447 }
443 448
444 static constexpr u64 MaxHandles = 0x40; 449 static constexpr u64 MaxHandles = 0x40;
@@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
446 if (handle_count > MaxHandles) { 451 if (handle_count > MaxHandles) {
447 LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}", 452 LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}",
448 MaxHandles, handle_count); 453 MaxHandles, handle_count);
449 return ERR_OUT_OF_RANGE; 454 return ResultOutOfRange;
450 } 455 }
451 456
452 auto& kernel = system.Kernel(); 457 auto& kernel = system.Kernel();
@@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
459 464
460 if (object == nullptr) { 465 if (object == nullptr) {
461 LOG_ERROR(Kernel_SVC, "Object is a nullptr"); 466 LOG_ERROR(Kernel_SVC, "Object is a nullptr");
462 return ERR_INVALID_HANDLE; 467 return ResultInvalidHandle;
463 } 468 }
464 469
465 objects[i] = object.get(); 470 objects[i] = object.get();
@@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
481 // Get the thread from its handle. 486 // Get the thread from its handle.
482 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 487 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
483 std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle); 488 std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle);
489
484 if (!thread) { 490 if (!thread) {
485 LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle); 491 LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle);
486 return ResultInvalidHandle; 492 return ResultInvalidHandle;
@@ -525,6 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
525 LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address); 531 LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);
526 532
527 // Validate the input address. 533 // Validate the input address.
534
528 if (Memory::IsKernelAddress(address)) { 535 if (Memory::IsKernelAddress(address)) {
529 LOG_ERROR(Kernel_SVC, 536 LOG_ERROR(Kernel_SVC,
530 "Attempting to arbitrate an unlock on a kernel address (address={:08X})", 537 "Attempting to arbitrate an unlock on a kernel address (address={:08X})",
@@ -735,7 +742,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
735 if (info_sub_id != 0) { 742 if (info_sub_id != 0) {
736 LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, 743 LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
737 info_sub_id); 744 info_sub_id);
738 return ERR_INVALID_ENUM_VALUE; 745 return ResultInvalidEnumValue;
739 } 746 }
740 747
741 const auto& current_process_handle_table = 748 const auto& current_process_handle_table =
@@ -744,7 +751,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
744 if (!process) { 751 if (!process) {
745 LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}", 752 LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}",
746 info_id, info_sub_id, handle); 753 info_id, info_sub_id, handle);
747 return ERR_INVALID_HANDLE; 754 return ResultInvalidHandle;
748 } 755 }
749 756
750 switch (info_id_type) { 757 switch (info_id_type) {
@@ -826,7 +833,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
826 } 833 }
827 834
828 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); 835 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
829 return ERR_INVALID_ENUM_VALUE; 836 return ResultInvalidEnumValue;
830 } 837 }
831 838
832 case GetInfoType::IsCurrentProcessBeingDebugged: 839 case GetInfoType::IsCurrentProcessBeingDebugged:
@@ -836,13 +843,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
836 case GetInfoType::RegisterResourceLimit: { 843 case GetInfoType::RegisterResourceLimit: {
837 if (handle != 0) { 844 if (handle != 0) {
838 LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle); 845 LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle);
839 return ERR_INVALID_HANDLE; 846 return ResultInvalidHandle;
840 } 847 }
841 848
842 if (info_sub_id != 0) { 849 if (info_sub_id != 0) {
843 LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, 850 LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
844 info_sub_id); 851 info_sub_id);
845 return ERR_INVALID_COMBINATION; 852 return ResultInvalidCombination;
846 } 853 }
847 854
848 Process* const current_process = system.Kernel().CurrentProcess(); 855 Process* const current_process = system.Kernel().CurrentProcess();
@@ -867,13 +874,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
867 if (handle != 0) { 874 if (handle != 0) {
868 LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}", 875 LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}",
869 handle); 876 handle);
870 return ERR_INVALID_HANDLE; 877 return ResultInvalidHandle;
871 } 878 }
872 879
873 if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) { 880 if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) {
874 LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}", 881 LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}",
875 Process::RANDOM_ENTROPY_SIZE, info_sub_id); 882 Process::RANDOM_ENTROPY_SIZE, info_sub_id);
876 return ERR_INVALID_COMBINATION; 883 return ResultInvalidCombination;
877 } 884 }
878 885
879 *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id); 886 *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
@@ -890,7 +897,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
890 if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) { 897 if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
891 LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus, 898 LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus,
892 info_sub_id); 899 info_sub_id);
893 return ERR_INVALID_COMBINATION; 900 return ResultInvalidCombination;
894 } 901 }
895 902
896 const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>( 903 const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>(
@@ -898,7 +905,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
898 if (!thread) { 905 if (!thread) {
899 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", 906 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
900 static_cast<Handle>(handle)); 907 static_cast<Handle>(handle));
901 return ERR_INVALID_HANDLE; 908 return ResultInvalidHandle;
902 } 909 }
903 910
904 const auto& core_timing = system.CoreTiming(); 911 const auto& core_timing = system.CoreTiming();
@@ -922,7 +929,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
922 929
923 default: 930 default:
924 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); 931 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
925 return ERR_INVALID_ENUM_VALUE; 932 return ResultInvalidEnumValue;
926 } 933 }
927} 934}
928 935
@@ -945,22 +952,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
945 952
946 if (!Common::Is4KBAligned(addr)) { 953 if (!Common::Is4KBAligned(addr)) {
947 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); 954 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
948 return ERR_INVALID_ADDRESS; 955 return ResultInvalidAddress;
949 } 956 }
950 957
951 if (!Common::Is4KBAligned(size)) { 958 if (!Common::Is4KBAligned(size)) {
952 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); 959 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
953 return ERR_INVALID_SIZE; 960 return ResultInvalidSize;
954 } 961 }
955 962
956 if (size == 0) { 963 if (size == 0) {
957 LOG_ERROR(Kernel_SVC, "Size is zero"); 964 LOG_ERROR(Kernel_SVC, "Size is zero");
958 return ERR_INVALID_SIZE; 965 return ResultInvalidSize;
959 } 966 }
960 967
961 if (!(addr < addr + size)) { 968 if (!(addr < addr + size)) {
962 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); 969 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
963 return ERR_INVALID_MEMORY_RANGE; 970 return ResultInvalidMemoryRange;
964 } 971 }
965 972
966 Process* const current_process{system.Kernel().CurrentProcess()}; 973 Process* const current_process{system.Kernel().CurrentProcess()};
@@ -968,21 +975,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
968 975
969 if (current_process->GetSystemResourceSize() == 0) { 976 if (current_process->GetSystemResourceSize() == 0) {
970 LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); 977 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
971 return ERR_INVALID_STATE; 978 return ResultInvalidState;
972 } 979 }
973 980
974 if (!page_table.IsInsideAddressSpace(addr, size)) { 981 if (!page_table.IsInsideAddressSpace(addr, size)) {
975 LOG_ERROR(Kernel_SVC, 982 LOG_ERROR(Kernel_SVC,
976 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 983 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
977 size); 984 size);
978 return ERR_INVALID_MEMORY_RANGE; 985 return ResultInvalidMemoryRange;
979 } 986 }
980 987
981 if (page_table.IsOutsideAliasRegion(addr, size)) { 988 if (page_table.IsOutsideAliasRegion(addr, size)) {
982 LOG_ERROR(Kernel_SVC, 989 LOG_ERROR(Kernel_SVC,
983 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, 990 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
984 size); 991 size);
985 return ERR_INVALID_MEMORY_RANGE; 992 return ResultInvalidMemoryRange;
986 } 993 }
987 994
988 return page_table.MapPhysicalMemory(addr, size); 995 return page_table.MapPhysicalMemory(addr, size);
@@ -999,22 +1006,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
999 1006
1000 if (!Common::Is4KBAligned(addr)) { 1007 if (!Common::Is4KBAligned(addr)) {
1001 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); 1008 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
1002 return ERR_INVALID_ADDRESS; 1009 return ResultInvalidAddress;
1003 } 1010 }
1004 1011
1005 if (!Common::Is4KBAligned(size)) { 1012 if (!Common::Is4KBAligned(size)) {
1006 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); 1013 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
1007 return ERR_INVALID_SIZE; 1014 return ResultInvalidSize;
1008 } 1015 }
1009 1016
1010 if (size == 0) { 1017 if (size == 0) {
1011 LOG_ERROR(Kernel_SVC, "Size is zero"); 1018 LOG_ERROR(Kernel_SVC, "Size is zero");
1012 return ERR_INVALID_SIZE; 1019 return ResultInvalidSize;
1013 } 1020 }
1014 1021
1015 if (!(addr < addr + size)) { 1022 if (!(addr < addr + size)) {
1016 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); 1023 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
1017 return ERR_INVALID_MEMORY_RANGE; 1024 return ResultInvalidMemoryRange;
1018 } 1025 }
1019 1026
1020 Process* const current_process{system.Kernel().CurrentProcess()}; 1027 Process* const current_process{system.Kernel().CurrentProcess()};
@@ -1022,21 +1029,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
1022 1029
1023 if (current_process->GetSystemResourceSize() == 0) { 1030 if (current_process->GetSystemResourceSize() == 0) {
1024 LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); 1031 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
1025 return ERR_INVALID_STATE; 1032 return ResultInvalidState;
1026 } 1033 }
1027 1034
1028 if (!page_table.IsInsideAddressSpace(addr, size)) { 1035 if (!page_table.IsInsideAddressSpace(addr, size)) {
1029 LOG_ERROR(Kernel_SVC, 1036 LOG_ERROR(Kernel_SVC,
1030 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 1037 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
1031 size); 1038 size);
1032 return ERR_INVALID_MEMORY_RANGE; 1039 return ResultInvalidMemoryRange;
1033 } 1040 }
1034 1041
1035 if (page_table.IsOutsideAliasRegion(addr, size)) { 1042 if (page_table.IsOutsideAliasRegion(addr, size)) {
1036 LOG_ERROR(Kernel_SVC, 1043 LOG_ERROR(Kernel_SVC,
1037 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, 1044 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
1038 size); 1045 size);
1039 return ERR_INVALID_MEMORY_RANGE; 1046 return ResultInvalidMemoryRange;
1040 } 1047 }
1041 1048
1042 return page_table.UnmapPhysicalMemory(addr, size); 1049 return page_table.UnmapPhysicalMemory(addr, size);
@@ -1206,23 +1213,23 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1206 1213
1207 if (!Common::Is4KBAligned(addr)) { 1214 if (!Common::Is4KBAligned(addr)) {
1208 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); 1215 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr);
1209 return ERR_INVALID_ADDRESS; 1216 return ResultInvalidAddress;
1210 } 1217 }
1211 1218
1212 if (size == 0) { 1219 if (size == 0) {
1213 LOG_ERROR(Kernel_SVC, "Size is 0"); 1220 LOG_ERROR(Kernel_SVC, "Size is 0");
1214 return ERR_INVALID_SIZE; 1221 return ResultInvalidSize;
1215 } 1222 }
1216 1223
1217 if (!Common::Is4KBAligned(size)) { 1224 if (!Common::Is4KBAligned(size)) {
1218 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); 1225 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size);
1219 return ERR_INVALID_SIZE; 1226 return ResultInvalidSize;
1220 } 1227 }
1221 1228
1222 if (!IsValidAddressRange(addr, size)) { 1229 if (!IsValidAddressRange(addr, size)) {
1223 LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", 1230 LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
1224 addr, size); 1231 addr, size);
1225 return ERR_INVALID_ADDRESS_STATE; 1232 return ResultInvalidCurrentMemory;
1226 } 1233 }
1227 1234
1228 const auto permission_type = static_cast<Memory::MemoryPermission>(permissions); 1235 const auto permission_type = static_cast<Memory::MemoryPermission>(permissions);
@@ -1230,7 +1237,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1230 Memory::MemoryPermission::ReadAndWrite) { 1237 Memory::MemoryPermission::ReadAndWrite) {
1231 LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}", 1238 LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}",
1232 permissions); 1239 permissions);
1233 return ERR_INVALID_MEMORY_PERMISSIONS; 1240 return ResultInvalidMemoryPermissions;
1234 } 1241 }
1235 1242
1236 auto* const current_process{system.Kernel().CurrentProcess()}; 1243 auto* const current_process{system.Kernel().CurrentProcess()};
@@ -1241,7 +1248,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1241 "Addr does not fit within the valid region, addr=0x{:016X}, " 1248 "Addr does not fit within the valid region, addr=0x{:016X}, "
1242 "size=0x{:016X}", 1249 "size=0x{:016X}",
1243 addr, size); 1250 addr, size);
1244 return ERR_INVALID_MEMORY_RANGE; 1251 return ResultInvalidMemoryRange;
1245 } 1252 }
1246 1253
1247 if (page_table.IsInsideHeapRegion(addr, size)) { 1254 if (page_table.IsInsideHeapRegion(addr, size)) {
@@ -1249,7 +1256,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1249 "Addr does not fit within the heap region, addr=0x{:016X}, " 1256 "Addr does not fit within the heap region, addr=0x{:016X}, "
1250 "size=0x{:016X}", 1257 "size=0x{:016X}",
1251 addr, size); 1258 addr, size);
1252 return ERR_INVALID_MEMORY_RANGE; 1259 return ResultInvalidMemoryRange;
1253 } 1260 }
1254 1261
1255 if (page_table.IsInsideAliasRegion(addr, size)) { 1262 if (page_table.IsInsideAliasRegion(addr, size)) {
@@ -1257,14 +1264,14 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1257 "Address does not fit within the map region, addr=0x{:016X}, " 1264 "Address does not fit within the map region, addr=0x{:016X}, "
1258 "size=0x{:016X}", 1265 "size=0x{:016X}",
1259 addr, size); 1266 addr, size);
1260 return ERR_INVALID_MEMORY_RANGE; 1267 return ResultInvalidMemoryRange;
1261 } 1268 }
1262 1269
1263 auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)}; 1270 auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)};
1264 if (!shared_memory) { 1271 if (!shared_memory) {
1265 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", 1272 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
1266 shared_memory_handle); 1273 shared_memory_handle);
1267 return ERR_INVALID_HANDLE; 1274 return ResultInvalidHandle;
1268 } 1275 }
1269 1276
1270 return shared_memory->Map(*current_process, addr, size, permission_type); 1277 return shared_memory->Map(*current_process, addr, size, permission_type);
@@ -1285,7 +1292,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add
1285 if (!process) { 1292 if (!process) {
1286 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", 1293 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
1287 process_handle); 1294 process_handle);
1288 return ERR_INVALID_HANDLE; 1295 return ResultInvalidHandle;
1289 } 1296 }
1290 1297
1291 auto& memory{system.Memory()}; 1298 auto& memory{system.Memory()};
@@ -1332,18 +1339,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1332 if (!Common::Is4KBAligned(src_address)) { 1339 if (!Common::Is4KBAligned(src_address)) {
1333 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", 1340 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
1334 src_address); 1341 src_address);
1335 return ERR_INVALID_ADDRESS; 1342 return ResultInvalidAddress;
1336 } 1343 }
1337 1344
1338 if (!Common::Is4KBAligned(dst_address)) { 1345 if (!Common::Is4KBAligned(dst_address)) {
1339 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", 1346 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
1340 dst_address); 1347 dst_address);
1341 return ERR_INVALID_ADDRESS; 1348 return ResultInvalidAddress;
1342 } 1349 }
1343 1350
1344 if (size == 0 || !Common::Is4KBAligned(size)) { 1351 if (size == 0 || !Common::Is4KBAligned(size)) {
1345 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); 1352 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
1346 return ERR_INVALID_SIZE; 1353 return ResultInvalidSize;
1347 } 1354 }
1348 1355
1349 if (!IsValidAddressRange(dst_address, size)) { 1356 if (!IsValidAddressRange(dst_address, size)) {
@@ -1351,7 +1358,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1351 "Destination address range overflows the address space (dst_address=0x{:016X}, " 1358 "Destination address range overflows the address space (dst_address=0x{:016X}, "
1352 "size=0x{:016X}).", 1359 "size=0x{:016X}).",
1353 dst_address, size); 1360 dst_address, size);
1354 return ERR_INVALID_ADDRESS_STATE; 1361 return ResultInvalidCurrentMemory;
1355 } 1362 }
1356 1363
1357 if (!IsValidAddressRange(src_address, size)) { 1364 if (!IsValidAddressRange(src_address, size)) {
@@ -1359,7 +1366,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1359 "Source address range overflows the address space (src_address=0x{:016X}, " 1366 "Source address range overflows the address space (src_address=0x{:016X}, "
1360 "size=0x{:016X}).", 1367 "size=0x{:016X}).",
1361 src_address, size); 1368 src_address, size);
1362 return ERR_INVALID_ADDRESS_STATE; 1369 return ResultInvalidCurrentMemory;
1363 } 1370 }
1364 1371
1365 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1372 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1367,7 +1374,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1367 if (!process) { 1374 if (!process) {
1368 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", 1375 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
1369 process_handle); 1376 process_handle);
1370 return ERR_INVALID_HANDLE; 1377 return ResultInvalidHandle;
1371 } 1378 }
1372 1379
1373 auto& page_table = process->PageTable(); 1380 auto& page_table = process->PageTable();
@@ -1376,7 +1383,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1376 "Source address range is not within the address space (src_address=0x{:016X}, " 1383 "Source address range is not within the address space (src_address=0x{:016X}, "
1377 "size=0x{:016X}).", 1384 "size=0x{:016X}).",
1378 src_address, size); 1385 src_address, size);
1379 return ERR_INVALID_ADDRESS_STATE; 1386 return ResultInvalidCurrentMemory;
1380 } 1387 }
1381 1388
1382 if (!page_table.IsInsideASLRRegion(dst_address, size)) { 1389 if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1384,7 +1391,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1384 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " 1391 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
1385 "size=0x{:016X}).", 1392 "size=0x{:016X}).",
1386 dst_address, size); 1393 dst_address, size);
1387 return ERR_INVALID_MEMORY_RANGE; 1394 return ResultInvalidMemoryRange;
1388 } 1395 }
1389 1396
1390 return page_table.MapProcessCodeMemory(dst_address, src_address, size); 1397 return page_table.MapProcessCodeMemory(dst_address, src_address, size);
@@ -1400,18 +1407,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1400 if (!Common::Is4KBAligned(dst_address)) { 1407 if (!Common::Is4KBAligned(dst_address)) {
1401 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", 1408 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
1402 dst_address); 1409 dst_address);
1403 return ERR_INVALID_ADDRESS; 1410 return ResultInvalidAddress;
1404 } 1411 }
1405 1412
1406 if (!Common::Is4KBAligned(src_address)) { 1413 if (!Common::Is4KBAligned(src_address)) {
1407 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", 1414 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
1408 src_address); 1415 src_address);
1409 return ERR_INVALID_ADDRESS; 1416 return ResultInvalidAddress;
1410 } 1417 }
1411 1418
1412 if (size == 0 || Common::Is4KBAligned(size)) { 1419 if (size == 0 || Common::Is4KBAligned(size)) {
1413 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); 1420 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
1414 return ERR_INVALID_SIZE; 1421 return ResultInvalidSize;
1415 } 1422 }
1416 1423
1417 if (!IsValidAddressRange(dst_address, size)) { 1424 if (!IsValidAddressRange(dst_address, size)) {
@@ -1419,7 +1426,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1419 "Destination address range overflows the address space (dst_address=0x{:016X}, " 1426 "Destination address range overflows the address space (dst_address=0x{:016X}, "
1420 "size=0x{:016X}).", 1427 "size=0x{:016X}).",
1421 dst_address, size); 1428 dst_address, size);
1422 return ERR_INVALID_ADDRESS_STATE; 1429 return ResultInvalidCurrentMemory;
1423 } 1430 }
1424 1431
1425 if (!IsValidAddressRange(src_address, size)) { 1432 if (!IsValidAddressRange(src_address, size)) {
@@ -1427,7 +1434,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1427 "Source address range overflows the address space (src_address=0x{:016X}, " 1434 "Source address range overflows the address space (src_address=0x{:016X}, "
1428 "size=0x{:016X}).", 1435 "size=0x{:016X}).",
1429 src_address, size); 1436 src_address, size);
1430 return ERR_INVALID_ADDRESS_STATE; 1437 return ResultInvalidCurrentMemory;
1431 } 1438 }
1432 1439
1433 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1440 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1435,7 +1442,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1435 if (!process) { 1442 if (!process) {
1436 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", 1443 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
1437 process_handle); 1444 process_handle);
1438 return ERR_INVALID_HANDLE; 1445 return ResultInvalidHandle;
1439 } 1446 }
1440 1447
1441 auto& page_table = process->PageTable(); 1448 auto& page_table = process->PageTable();
@@ -1444,7 +1451,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1444 "Source address range is not within the address space (src_address=0x{:016X}, " 1451 "Source address range is not within the address space (src_address=0x{:016X}, "
1445 "size=0x{:016X}).", 1452 "size=0x{:016X}).",
1446 src_address, size); 1453 src_address, size);
1447 return ERR_INVALID_ADDRESS_STATE; 1454 return ResultInvalidCurrentMemory;
1448 } 1455 }
1449 1456
1450 if (!page_table.IsInsideASLRRegion(dst_address, size)) { 1457 if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1452,7 +1459,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1452 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " 1459 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
1453 "size=0x{:016X}).", 1460 "size=0x{:016X}).",
1454 dst_address, size); 1461 dst_address, size);
1455 return ERR_INVALID_MEMORY_RANGE; 1462 return ResultInvalidMemoryRange;
1456 } 1463 }
1457 1464
1458 return page_table.UnmapProcessCodeMemory(dst_address, src_address, size); 1465 return page_table.UnmapProcessCodeMemory(dst_address, src_address, size);
@@ -1515,8 +1522,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1515 return ResultInvalidPriority; 1522 return ResultInvalidPriority;
1516 } 1523 }
1517 1524
1518 ASSERT(process.GetResourceLimit()->Reserve( 1525 KScopedResourceReservation thread_reservation(
1519 LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000)); 1526 kernel.CurrentProcess(), LimitableResource::Threads, 1,
1527 system.CoreTiming().GetGlobalTimeNs().count() + 100000000);
1528 if (!thread_reservation.Succeeded()) {
1529 LOG_ERROR(Kernel_SVC, "Could not reserve a new thread");
1530 return ResultResourceLimitedExceeded;
1531 }
1520 1532
1521 std::shared_ptr<KThread> thread; 1533 std::shared_ptr<KThread> thread;
1522 { 1534 {
@@ -1536,6 +1548,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1536 // Set the thread name for debugging purposes. 1548 // Set the thread name for debugging purposes.
1537 thread->SetName( 1549 thread->SetName(
1538 fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); 1550 fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
1551 thread_reservation.Commit();
1539 1552
1540 return RESULT_SUCCESS; 1553 return RESULT_SUCCESS;
1541} 1554}
@@ -1844,7 +1857,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
1844 1857
1845 LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle); 1858 LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle);
1846 1859
1847 return Svc::ResultInvalidHandle; 1860 return ResultInvalidHandle;
1848} 1861}
1849 1862
1850static ResultCode ResetSignal32(Core::System& system, Handle handle) { 1863static ResultCode ResetSignal32(Core::System& system, Handle handle) {
@@ -1860,18 +1873,18 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1860 1873
1861 if (!Common::Is4KBAligned(addr)) { 1874 if (!Common::Is4KBAligned(addr)) {
1862 LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr); 1875 LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
1863 return ERR_INVALID_ADDRESS; 1876 return ResultInvalidAddress;
1864 } 1877 }
1865 1878
1866 if (!Common::Is4KBAligned(size) || size == 0) { 1879 if (!Common::Is4KBAligned(size) || size == 0) {
1867 LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size); 1880 LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
1868 return ERR_INVALID_ADDRESS; 1881 return ResultInvalidAddress;
1869 } 1882 }
1870 1883
1871 if (!IsValidAddressRange(addr, size)) { 1884 if (!IsValidAddressRange(addr, size)) {
1872 LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})", 1885 LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
1873 addr, size); 1886 addr, size);
1874 return ERR_INVALID_ADDRESS_STATE; 1887 return ResultInvalidCurrentMemory;
1875 } 1888 }
1876 1889
1877 const auto perms{static_cast<Memory::MemoryPermission>(permissions)}; 1890 const auto perms{static_cast<Memory::MemoryPermission>(permissions)};
@@ -1879,10 +1892,17 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1879 perms == Memory::MemoryPermission::Write) { 1892 perms == Memory::MemoryPermission::Write) {
1880 LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})", 1893 LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
1881 permissions); 1894 permissions);
1882 return ERR_INVALID_MEMORY_PERMISSIONS; 1895 return ResultInvalidMemoryPermissions;
1883 } 1896 }
1884 1897
1885 auto& kernel = system.Kernel(); 1898 auto& kernel = system.Kernel();
1899 // Reserve a new transfer memory from the process resource limit.
1900 KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(),
1901 LimitableResource::TransferMemory);
1902 if (!trmem_reservation.Succeeded()) {
1903 LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory");
1904 return ResultResourceLimitedExceeded;
1905 }
1886 auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms); 1906 auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
1887 1907
1888 if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) { 1908 if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
@@ -1894,6 +1914,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1894 if (result.Failed()) { 1914 if (result.Failed()) {
1895 return result.Code(); 1915 return result.Code();
1896 } 1916 }
1917 trmem_reservation.Commit();
1897 1918
1898 *handle = *result; 1919 *handle = *result;
1899 return RESULT_SUCCESS; 1920 return RESULT_SUCCESS;
@@ -1989,7 +2010,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
1989 LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw); 2010 LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw);
1990 return set_result; 2011 return set_result;
1991 } 2012 }
1992
1993 return RESULT_SUCCESS; 2013 return RESULT_SUCCESS;
1994} 2014}
1995 2015
@@ -2002,8 +2022,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle
2002static ResultCode SignalEvent(Core::System& system, Handle event_handle) { 2022static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
2003 LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle); 2023 LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle);
2004 2024
2025 auto& kernel = system.Kernel();
2005 // Get the current handle table. 2026 // Get the current handle table.
2006 const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 2027 const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
2028
2029 // Reserve a new event from the process resource limit.
2030 KScopedResourceReservation event_reservation(kernel.CurrentProcess(),
2031 LimitableResource::Events);
2032 if (!event_reservation.Succeeded()) {
2033 LOG_ERROR(Kernel, "Could not reserve a new event");
2034 return ResultResourceLimitedExceeded;
2035 }
2007 2036
2008 // Get the writable event. 2037 // Get the writable event.
2009 auto writable_event = handle_table.Get<KWritableEvent>(event_handle); 2038 auto writable_event = handle_table.Get<KWritableEvent>(event_handle);
@@ -2012,6 +2041,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
2012 return ResultInvalidHandle; 2041 return ResultInvalidHandle;
2013 } 2042 }
2014 2043
2044 // Commit the successfuly reservation.
2045 event_reservation.Commit();
2046
2015 return writable_event->Signal(); 2047 return writable_event->Signal();
2016} 2048}
2017 2049
@@ -2043,7 +2075,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) {
2043 2075
2044 LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle); 2076 LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle);
2045 2077
2046 return Svc::ResultInvalidHandle; 2078 return ResultInvalidHandle;
2047} 2079}
2048 2080
2049static ResultCode ClearEvent32(Core::System& system, Handle event_handle) { 2081static ResultCode ClearEvent32(Core::System& system, Handle event_handle) {
@@ -2106,13 +2138,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
2106 if (!process) { 2138 if (!process) {
2107 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", 2139 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
2108 process_handle); 2140 process_handle);
2109 return ERR_INVALID_HANDLE; 2141 return ResultInvalidHandle;
2110 } 2142 }
2111 2143
2112 const auto info_type = static_cast<InfoType>(type); 2144 const auto info_type = static_cast<InfoType>(type);
2113 if (info_type != InfoType::Status) { 2145 if (info_type != InfoType::Status) {
2114 LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type); 2146 LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type);
2115 return ERR_INVALID_ENUM_VALUE; 2147 return ResultInvalidEnumValue;
2116 } 2148 }
2117 2149
2118 *out = static_cast<u64>(process->GetStatus()); 2150 *out = static_cast<u64>(process->GetStatus());
@@ -2174,7 +2206,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
2174 const auto type = static_cast<LimitableResource>(resource_type); 2206 const auto type = static_cast<LimitableResource>(resource_type);
2175 if (!IsValidResourceType(type)) { 2207 if (!IsValidResourceType(type)) {
2176 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); 2208 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
2177 return ERR_INVALID_ENUM_VALUE; 2209 return ResultInvalidEnumValue;
2178 } 2210 }
2179 2211
2180 auto* const current_process = system.Kernel().CurrentProcess(); 2212 auto* const current_process = system.Kernel().CurrentProcess();
@@ -2185,16 +2217,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
2185 if (!resource_limit_object) { 2217 if (!resource_limit_object) {
2186 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", 2218 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
2187 resource_limit); 2219 resource_limit);
2188 return ERR_INVALID_HANDLE; 2220 return ResultInvalidHandle;
2189 } 2221 }
2190 2222
2191 const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); 2223 const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
2192 if (set_result.IsError()) { 2224 if (set_result.IsError()) {
2193 LOG_ERROR( 2225 LOG_ERROR(Kernel_SVC,
2194 Kernel_SVC, 2226 "Attempted to lower resource limit ({}) for category '{}' below its current "
2195 "Attempted to lower resource limit ({}) for category '{}' below its current value ({})", 2227 "value ({})",
2196 resource_limit_object->GetLimitValue(type), resource_type, 2228 resource_limit_object->GetLimitValue(type), resource_type,
2197 resource_limit_object->GetCurrentValue(type)); 2229 resource_limit_object->GetCurrentValue(type));
2198 return set_result; 2230 return set_result;
2199 } 2231 }
2200 2232
@@ -2211,7 +2243,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
2211 LOG_ERROR(Kernel_SVC, 2243 LOG_ERROR(Kernel_SVC,
2212 "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}", 2244 "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
2213 out_process_ids_size); 2245 out_process_ids_size);
2214 return ERR_OUT_OF_RANGE; 2246 return ResultOutOfRange;
2215 } 2247 }
2216 2248
2217 const auto& kernel = system.Kernel(); 2249 const auto& kernel = system.Kernel();
@@ -2221,7 +2253,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
2221 out_process_ids, total_copy_size)) { 2253 out_process_ids, total_copy_size)) {
2222 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", 2254 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2223 out_process_ids, out_process_ids + total_copy_size); 2255 out_process_ids, out_process_ids + total_copy_size);
2224 return ERR_INVALID_ADDRESS_STATE; 2256 return ResultInvalidCurrentMemory;
2225 } 2257 }
2226 2258
2227 auto& memory = system.Memory(); 2259 auto& memory = system.Memory();
@@ -2250,7 +2282,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
2250 if ((out_thread_ids_size & 0xF0000000) != 0) { 2282 if ((out_thread_ids_size & 0xF0000000) != 0) {
2251 LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}", 2283 LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
2252 out_thread_ids_size); 2284 out_thread_ids_size);
2253 return ERR_OUT_OF_RANGE; 2285 return ResultOutOfRange;
2254 } 2286 }
2255 2287
2256 const auto* const current_process = system.Kernel().CurrentProcess(); 2288 const auto* const current_process = system.Kernel().CurrentProcess();
@@ -2260,7 +2292,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
2260 !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) { 2292 !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) {
2261 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", 2293 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2262 out_thread_ids, out_thread_ids + total_copy_size); 2294 out_thread_ids, out_thread_ids + total_copy_size);
2263 return ERR_INVALID_ADDRESS_STATE; 2295 return ResultInvalidCurrentMemory;
2264 } 2296 }
2265 2297
2266 auto& memory = system.Memory(); 2298 auto& memory = system.Memory();
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
index 204cd989d..a26d9f2c9 100644
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -6,21 +6,36 @@
6 6
7#include "core/hle/result.h" 7#include "core/hle/result.h"
8 8
9namespace Kernel::Svc { 9namespace Kernel {
10 10
11// Confirmed Switch kernel error codes
12
13constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7};
14constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14};
11constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57}; 15constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57};
12constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59}; 16constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
17constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101};
13constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102}; 18constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
14constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103}; 19constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103};
20constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104};
21constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105};
15constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106}; 22constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
23constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108};
24constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110};
16constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112}; 25constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112};
17constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113}; 26constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113};
18constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114}; 27constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
28constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115};
19constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116}; 29constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116};
20constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117}; 30constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
21constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118}; 31constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
32constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119};
22constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120}; 33constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
34constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121};
23constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122}; 35constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122};
36constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123};
24constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125}; 37constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
38constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126};
39constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132};
25 40
26} // namespace Kernel::Svc 41} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 765f408c3..6b0fc1591 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/k_resource_limit.h"
5#include "core/hle/kernel/kernel.h" 6#include "core/hle/kernel/kernel.h"
6#include "core/hle/kernel/memory/page_table.h" 7#include "core/hle/kernel/memory/page_table.h"
7#include "core/hle/kernel/process.h" 8#include "core/hle/kernel/process.h"
@@ -17,6 +18,7 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory)
17TransferMemory::~TransferMemory() { 18TransferMemory::~TransferMemory() {
18 // Release memory region when transfer memory is destroyed 19 // Release memory region when transfer memory is destroyed
19 Reset(); 20 Reset();
21 owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1);
20} 22}
21 23
22std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, 24std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index bb77c2569..8e1fe9438 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1047,20 +1047,21 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
1047 1047
1048 const u64 offset{rp.Pop<u64>()}; 1048 const u64 offset{rp.Pop<u64>()};
1049 const std::vector<u8> data{ctx.ReadBuffer()}; 1049 const std::vector<u8> data{ctx.ReadBuffer()};
1050 const std::size_t size{std::min(data.size(), backing.GetSize() - offset)};
1050 1051
1051 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size()); 1052 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
1052 1053
1053 if (data.size() > backing.GetSize() - offset) { 1054 if (offset > backing.GetSize()) {
1054 LOG_ERROR(Service_AM, 1055 LOG_ERROR(Service_AM,
1055 "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}", 1056 "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
1056 backing.GetSize(), data.size(), offset); 1057 backing.GetSize(), size, offset);
1057 1058
1058 IPC::ResponseBuilder rb{ctx, 2}; 1059 IPC::ResponseBuilder rb{ctx, 2};
1059 rb.Push(ERR_SIZE_OUT_OF_BOUNDS); 1060 rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
1060 return; 1061 return;
1061 } 1062 }
1062 1063
1063 std::memcpy(backing.GetData().data() + offset, data.data(), data.size()); 1064 std::memcpy(backing.GetData().data() + offset, data.data(), size);
1064 1065
1065 IPC::ResponseBuilder rb{ctx, 2}; 1066 IPC::ResponseBuilder rb{ctx, 2};
1066 rb.Push(RESULT_SUCCESS); 1067 rb.Push(RESULT_SUCCESS);
@@ -1070,11 +1071,11 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
1070 IPC::RequestParser rp{ctx}; 1071 IPC::RequestParser rp{ctx};
1071 1072
1072 const u64 offset{rp.Pop<u64>()}; 1073 const u64 offset{rp.Pop<u64>()};
1073 const std::size_t size{ctx.GetWriteBufferSize()}; 1074 const std::size_t size{std::min(ctx.GetWriteBufferSize(), backing.GetSize() - offset)};
1074 1075
1075 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size); 1076 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
1076 1077
1077 if (size > backing.GetSize() - offset) { 1078 if (offset > backing.GetSize()) {
1078 LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}", 1079 LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
1079 backing.GetSize(), size, offset); 1080 backing.GetSize(), size, offset);
1080 1081
diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp
index d7d3ee99a..c2bfe698f 100644
--- a/src/core/hle/service/am/applets/controller.cpp
+++ b/src/core/hle/service/am/applets/controller.cpp
@@ -211,7 +211,8 @@ void Controller::Execute() {
211 case ControllerSupportMode::ShowControllerFirmwareUpdate: 211 case ControllerSupportMode::ShowControllerFirmwareUpdate:
212 UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented", 212 UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
213 controller_private_arg.mode); 213 controller_private_arg.mode);
214 [[fallthrough]]; 214 ConfigurationComplete();
215 break;
215 default: { 216 default: {
216 ConfigurationComplete(); 217 ConfigurationComplete();
217 break; 218 break;
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 3022438b1..79b209c6b 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -121,6 +121,10 @@ void SoftwareKeyboard::ExecuteInteractive() {
121 std::memcpy(&request, data.data(), sizeof(Request)); 121 std::memcpy(&request, data.data(), sizeof(Request));
122 122
123 switch (request) { 123 switch (request) {
124 case Request::Finalize:
125 complete = true;
126 broker.SignalStateChanged();
127 break;
124 case Request::Calc: { 128 case Request::Calc: {
125 broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1})); 129 broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1}));
126 broker.SignalStateChanged(); 130 broker.SignalStateChanged();
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index dbf198345..70b9f3824 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -21,6 +21,7 @@
21 21
22namespace Service::HID { 22namespace Service::HID {
23constexpr s32 HID_JOYSTICK_MAX = 0x7fff; 23constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
24constexpr s32 HID_TRIGGER_MAX = 0x7fff;
24[[maybe_unused]] constexpr s32 HID_JOYSTICK_MIN = -0x7fff; 25[[maybe_unused]] constexpr s32 HID_JOYSTICK_MIN = -0x7fff;
25constexpr std::size_t NPAD_OFFSET = 0x9A00; 26constexpr std::size_t NPAD_OFFSET = 0x9A00;
26constexpr u32 BATTERY_FULL = 2; 27constexpr u32 BATTERY_FULL = 2;
@@ -48,6 +49,8 @@ Controller_NPad::NPadControllerType Controller_NPad::MapSettingsTypeToNPad(
48 return NPadControllerType::JoyRight; 49 return NPadControllerType::JoyRight;
49 case Settings::ControllerType::Handheld: 50 case Settings::ControllerType::Handheld:
50 return NPadControllerType::Handheld; 51 return NPadControllerType::Handheld;
52 case Settings::ControllerType::GameCube:
53 return NPadControllerType::GameCube;
51 default: 54 default:
52 UNREACHABLE(); 55 UNREACHABLE();
53 return NPadControllerType::ProController; 56 return NPadControllerType::ProController;
@@ -67,6 +70,8 @@ Settings::ControllerType Controller_NPad::MapNPadToSettingsType(
67 return Settings::ControllerType::RightJoycon; 70 return Settings::ControllerType::RightJoycon;
68 case NPadControllerType::Handheld: 71 case NPadControllerType::Handheld:
69 return Settings::ControllerType::Handheld; 72 return Settings::ControllerType::Handheld;
73 case NPadControllerType::GameCube:
74 return Settings::ControllerType::GameCube;
70 default: 75 default:
71 UNREACHABLE(); 76 UNREACHABLE();
72 return Settings::ControllerType::ProController; 77 return Settings::ControllerType::ProController;
@@ -209,6 +214,13 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
209 controller.assignment_mode = NpadAssignments::Single; 214 controller.assignment_mode = NpadAssignments::Single;
210 controller.footer_type = AppletFooterUiType::JoyRightHorizontal; 215 controller.footer_type = AppletFooterUiType::JoyRightHorizontal;
211 break; 216 break;
217 case NPadControllerType::GameCube:
218 controller.style_set.gamecube.Assign(1);
219 // The GC Controller behaves like a wired Pro Controller
220 controller.device_type.fullkey.Assign(1);
221 controller.system_properties.is_vertical.Assign(1);
222 controller.system_properties.use_plus.Assign(1);
223 break;
212 case NPadControllerType::Pokeball: 224 case NPadControllerType::Pokeball:
213 controller.style_set.palma.Assign(1); 225 controller.style_set.palma.Assign(1);
214 controller.device_type.palma.Assign(1); 226 controller.device_type.palma.Assign(1);
@@ -259,6 +271,7 @@ void Controller_NPad::OnInit() {
259 style.joycon_right.Assign(1); 271 style.joycon_right.Assign(1);
260 style.joycon_dual.Assign(1); 272 style.joycon_dual.Assign(1);
261 style.fullkey.Assign(1); 273 style.fullkey.Assign(1);
274 style.gamecube.Assign(1);
262 style.palma.Assign(1); 275 style.palma.Assign(1);
263 } 276 }
264 277
@@ -339,6 +352,7 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
339 auto& pad_state = npad_pad_states[controller_idx].pad_states; 352 auto& pad_state = npad_pad_states[controller_idx].pad_states;
340 auto& lstick_entry = npad_pad_states[controller_idx].l_stick; 353 auto& lstick_entry = npad_pad_states[controller_idx].l_stick;
341 auto& rstick_entry = npad_pad_states[controller_idx].r_stick; 354 auto& rstick_entry = npad_pad_states[controller_idx].r_stick;
355 auto& trigger_entry = npad_trigger_states[controller_idx];
342 const auto& button_state = buttons[controller_idx]; 356 const auto& button_state = buttons[controller_idx];
343 const auto& analog_state = sticks[controller_idx]; 357 const auto& analog_state = sticks[controller_idx];
344 const auto [stick_l_x_f, stick_l_y_f] = 358 const auto [stick_l_x_f, stick_l_y_f] =
@@ -404,6 +418,17 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
404 pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus()); 418 pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus());
405 pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus()); 419 pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus());
406 } 420 }
421
422 if (controller_type == NPadControllerType::GameCube) {
423 trigger_entry.l_analog = static_cast<s32>(
424 button_state[ZL - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
425 trigger_entry.r_analog = static_cast<s32>(
426 button_state[ZR - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
427 pad_state.zl.Assign(false);
428 pad_state.zr.Assign(button_state[R - BUTTON_HID_BEGIN]->GetStatus());
429 pad_state.l.Assign(button_state[ZL - BUTTON_HID_BEGIN]->GetStatus());
430 pad_state.r.Assign(button_state[ZR - BUTTON_HID_BEGIN]->GetStatus());
431 }
407} 432}
408 433
409void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, 434void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
@@ -418,6 +443,11 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
418 &npad.joy_left_states, &npad.joy_right_states, &npad.palma_states, 443 &npad.joy_left_states, &npad.joy_right_states, &npad.palma_states,
419 &npad.system_ext_states}; 444 &npad.system_ext_states};
420 445
446 // There is the posibility to have more controllers with analog triggers
447 const std::array<TriggerGeneric*, 1> controller_triggers{
448 &npad.gc_trigger_states,
449 };
450
421 for (auto* main_controller : controller_npads) { 451 for (auto* main_controller : controller_npads) {
422 main_controller->common.entry_count = 16; 452 main_controller->common.entry_count = 16;
423 main_controller->common.total_entry_count = 17; 453 main_controller->common.total_entry_count = 17;
@@ -435,6 +465,21 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
435 cur_entry.timestamp2 = cur_entry.timestamp; 465 cur_entry.timestamp2 = cur_entry.timestamp;
436 } 466 }
437 467
468 for (auto* analog_trigger : controller_triggers) {
469 analog_trigger->entry_count = 16;
470 analog_trigger->total_entry_count = 17;
471
472 const auto& last_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
473
474 analog_trigger->timestamp = core_timing.GetCPUTicks();
475 analog_trigger->last_entry_index = (analog_trigger->last_entry_index + 1) % 17;
476
477 auto& cur_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
478
479 cur_entry.timestamp = last_entry.timestamp + 1;
480 cur_entry.timestamp2 = cur_entry.timestamp;
481 }
482
438 const auto& controller_type = connected_controllers[i].type; 483 const auto& controller_type = connected_controllers[i].type;
439 484
440 if (controller_type == NPadControllerType::None || !connected_controllers[i].is_connected) { 485 if (controller_type == NPadControllerType::None || !connected_controllers[i].is_connected) {
@@ -444,6 +489,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
444 489
445 RequestPadStateUpdate(npad_index); 490 RequestPadStateUpdate(npad_index);
446 auto& pad_state = npad_pad_states[npad_index]; 491 auto& pad_state = npad_pad_states[npad_index];
492 auto& trigger_state = npad_trigger_states[npad_index];
447 493
448 auto& main_controller = 494 auto& main_controller =
449 npad.fullkey_states.npad[npad.fullkey_states.common.last_entry_index]; 495 npad.fullkey_states.npad[npad.fullkey_states.common.last_entry_index];
@@ -456,6 +502,8 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
456 auto& pokeball_entry = npad.palma_states.npad[npad.palma_states.common.last_entry_index]; 502 auto& pokeball_entry = npad.palma_states.npad[npad.palma_states.common.last_entry_index];
457 auto& libnx_entry = 503 auto& libnx_entry =
458 npad.system_ext_states.npad[npad.system_ext_states.common.last_entry_index]; 504 npad.system_ext_states.npad[npad.system_ext_states.common.last_entry_index];
505 auto& trigger_entry =
506 npad.gc_trigger_states.trigger[npad.gc_trigger_states.last_entry_index];
459 507
460 libnx_entry.connection_status.raw = 0; 508 libnx_entry.connection_status.raw = 0;
461 libnx_entry.connection_status.is_connected.Assign(1); 509 libnx_entry.connection_status.is_connected.Assign(1);
@@ -524,6 +572,18 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
524 572
525 libnx_entry.connection_status.is_right_connected.Assign(1); 573 libnx_entry.connection_status.is_right_connected.Assign(1);
526 break; 574 break;
575 case NPadControllerType::GameCube:
576 main_controller.connection_status.raw = 0;
577 main_controller.connection_status.is_connected.Assign(1);
578 main_controller.connection_status.is_wired.Assign(1);
579 main_controller.pad.pad_states.raw = pad_state.pad_states.raw;
580 main_controller.pad.l_stick = pad_state.l_stick;
581 main_controller.pad.r_stick = pad_state.r_stick;
582 trigger_entry.l_analog = trigger_state.l_analog;
583 trigger_entry.r_analog = trigger_state.r_analog;
584
585 libnx_entry.connection_status.is_wired.Assign(1);
586 break;
527 case NPadControllerType::Pokeball: 587 case NPadControllerType::Pokeball:
528 pokeball_entry.connection_status.raw = 0; 588 pokeball_entry.connection_status.raw = 0;
529 pokeball_entry.connection_status.is_connected.Assign(1); 589 pokeball_entry.connection_status.is_connected.Assign(1);
@@ -674,6 +734,7 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
674 right_sixaxis_entry.orientation = motion_devices[1].orientation; 734 right_sixaxis_entry.orientation = motion_devices[1].orientation;
675 } 735 }
676 break; 736 break;
737 case NPadControllerType::GameCube:
677 case NPadControllerType::Pokeball: 738 case NPadControllerType::Pokeball:
678 break; 739 break;
679 } 740 }
@@ -1135,6 +1196,8 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
1135 return style.joycon_left; 1196 return style.joycon_left;
1136 case NPadControllerType::JoyRight: 1197 case NPadControllerType::JoyRight:
1137 return style.joycon_right; 1198 return style.joycon_right;
1199 case NPadControllerType::GameCube:
1200 return style.gamecube;
1138 case NPadControllerType::Pokeball: 1201 case NPadControllerType::Pokeball:
1139 return style.palma; 1202 return style.palma;
1140 default: 1203 default:
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 48bab988c..bc2e6779d 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -51,6 +51,7 @@ public:
51 JoyDual, 51 JoyDual,
52 JoyLeft, 52 JoyLeft,
53 JoyRight, 53 JoyRight,
54 GameCube,
54 Pokeball, 55 Pokeball,
55 }; 56 };
56 57
@@ -60,6 +61,7 @@ public:
60 JoyconDual = 5, 61 JoyconDual = 5,
61 JoyconLeft = 6, 62 JoyconLeft = 6,
62 JoyconRight = 7, 63 JoyconRight = 7,
64 GameCube = 8,
63 Pokeball = 9, 65 Pokeball = 9,
64 MaxNpadType = 10, 66 MaxNpadType = 10,
65 }; 67 };
@@ -389,6 +391,25 @@ private:
389 }; 391 };
390 static_assert(sizeof(SixAxisGeneric) == 0x708, "SixAxisGeneric is an invalid size"); 392 static_assert(sizeof(SixAxisGeneric) == 0x708, "SixAxisGeneric is an invalid size");
391 393
394 struct TriggerState {
395 s64_le timestamp{};
396 s64_le timestamp2{};
397 s32_le l_analog{};
398 s32_le r_analog{};
399 };
400 static_assert(sizeof(TriggerState) == 0x18, "TriggerState is an invalid size");
401
402 struct TriggerGeneric {
403 INSERT_PADDING_BYTES(0x4);
404 s64_le timestamp;
405 INSERT_PADDING_BYTES(0x4);
406 s64_le total_entry_count;
407 s64_le last_entry_index;
408 s64_le entry_count;
409 std::array<TriggerState, 17> trigger{};
410 };
411 static_assert(sizeof(TriggerGeneric) == 0x1C8, "TriggerGeneric is an invalid size");
412
392 struct NPadSystemProperties { 413 struct NPadSystemProperties {
393 union { 414 union {
394 s64_le raw{}; 415 s64_le raw{};
@@ -509,7 +530,9 @@ private:
509 AppletFooterUiType footer_type; 530 AppletFooterUiType footer_type;
510 // nfc_states needs to be checked switchbrew does not match with HW 531 // nfc_states needs to be checked switchbrew does not match with HW
511 NfcXcdHandle nfc_states; 532 NfcXcdHandle nfc_states;
512 INSERT_PADDING_BYTES(0xdef); 533 INSERT_PADDING_BYTES(0x8); // Mutex
534 TriggerGeneric gc_trigger_states;
535 INSERT_PADDING_BYTES(0xc1f);
513 }; 536 };
514 static_assert(sizeof(NPadEntry) == 0x5000, "NPadEntry is an invalid size"); 537 static_assert(sizeof(NPadEntry) == 0x5000, "NPadEntry is an invalid size");
515 538
@@ -560,6 +583,7 @@ private:
560 f32 sixaxis_fusion_parameter2{}; 583 f32 sixaxis_fusion_parameter2{};
561 bool sixaxis_at_rest{true}; 584 bool sixaxis_at_rest{true};
562 std::array<ControllerPad, 10> npad_pad_states{}; 585 std::array<ControllerPad, 10> npad_pad_states{};
586 std::array<TriggerState, 10> npad_trigger_states{};
563 bool is_in_lr_assignment_mode{false}; 587 bool is_in_lr_assignment_mode{false};
564 Core::System& system; 588 Core::System& system;
565}; 589};
diff --git a/src/core/hle/service/ldn/errors.h b/src/core/hle/service/ldn/errors.h
new file mode 100644
index 000000000..a718c5c66
--- /dev/null
+++ b/src/core/hle/service/ldn/errors.h
@@ -0,0 +1,13 @@
1// Copyright 2021 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::LDN {
10
11constexpr ResultCode ERROR_DISABLED{ErrorModule::LDN, 22};
12
13} // namespace Service::LDN
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index ee908f399..c630d93cd 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -6,6 +6,7 @@
6 6
7#include "core/hle/ipc_helpers.h" 7#include "core/hle/ipc_helpers.h"
8#include "core/hle/result.h" 8#include "core/hle/result.h"
9#include "core/hle/service/ldn/errors.h"
9#include "core/hle/service/ldn/ldn.h" 10#include "core/hle/service/ldn/ldn.h"
10#include "core/hle/service/sm/sm.h" 11#include "core/hle/service/sm/sm.h"
11 12
@@ -103,7 +104,7 @@ public:
103 : ServiceFramework{system_, "IUserLocalCommunicationService"} { 104 : ServiceFramework{system_, "IUserLocalCommunicationService"} {
104 // clang-format off 105 // clang-format off
105 static const FunctionInfo functions[] = { 106 static const FunctionInfo functions[] = {
106 {0, nullptr, "GetState"}, 107 {0, &IUserLocalCommunicationService::GetState, "GetState"},
107 {1, nullptr, "GetNetworkInfo"}, 108 {1, nullptr, "GetNetworkInfo"},
108 {2, nullptr, "GetIpv4Address"}, 109 {2, nullptr, "GetIpv4Address"},
109 {3, nullptr, "GetDisconnectReason"}, 110 {3, nullptr, "GetDisconnectReason"},
@@ -138,13 +139,38 @@ public:
138 RegisterHandlers(functions); 139 RegisterHandlers(functions);
139 } 140 }
140 141
141 void Initialize2(Kernel::HLERequestContext& ctx) { 142 void GetState(Kernel::HLERequestContext& ctx) {
142 LOG_WARNING(Service_LDN, "(STUBBED) called"); 143 LOG_WARNING(Service_LDN, "(STUBBED) called");
143 // Result success seem make this services start network and continue. 144
144 // If we just pass result error then it will stop and maybe try again and again. 145 IPC::ResponseBuilder rb{ctx, 3};
146
147 // Indicate a network error, as we do not actually emulate LDN
148 rb.Push(static_cast<u32>(State::Error));
149
150 rb.Push(RESULT_SUCCESS);
151 }
152
153 void Initialize2(Kernel::HLERequestContext& ctx) {
154 LOG_DEBUG(Service_LDN, "called");
155
156 is_initialized = true;
157
145 IPC::ResponseBuilder rb{ctx, 2}; 158 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(RESULT_UNKNOWN); 159 rb.Push(RESULT_SUCCESS);
147 } 160 }
161
162private:
163 enum class State {
164 None,
165 Initialized,
166 AccessPointOpened,
167 AccessPointCreated,
168 StationOpened,
169 StationConnected,
170 Error,
171 };
172
173 bool is_initialized{};
148}; 174};
149 175
150class LDNS final : public ServiceFramework<LDNS> { 176class LDNS final : public ServiceFramework<LDNS> {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9da786b4e..c724d2554 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -11,10 +11,10 @@
11#include "common/scope_exit.h" 11#include "common/scope_exit.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/hle/ipc_helpers.h" 13#include "core/hle/ipc_helpers.h"
14#include "core/hle/kernel/errors.h"
15#include "core/hle/kernel/memory/page_table.h" 14#include "core/hle/kernel/memory/page_table.h"
16#include "core/hle/kernel/memory/system_control.h" 15#include "core/hle/kernel/memory/system_control.h"
17#include "core/hle/kernel/process.h" 16#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/svc_results.h"
18#include "core/hle/service/ldr/ldr.h" 18#include "core/hle/service/ldr/ldr.h"
19#include "core/hle/service/service.h" 19#include "core/hle/service/service.h"
20#include "core/loader/nro.h" 20#include "core/loader/nro.h"
@@ -330,7 +330,7 @@ public:
330 const VAddr addr{GetRandomMapRegion(page_table, size)}; 330 const VAddr addr{GetRandomMapRegion(page_table, size)};
331 const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)}; 331 const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)};
332 332
333 if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { 333 if (result == Kernel::ResultInvalidCurrentMemory) {
334 continue; 334 continue;
335 } 335 }
336 336
@@ -361,7 +361,7 @@ public:
361 const ResultCode result{ 361 const ResultCode result{
362 page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)}; 362 page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)};
363 363
364 if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { 364 if (result == Kernel::ResultInvalidCurrentMemory) {
365 continue; 365 continue;
366 } 366 }
367 367
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 5d6d25696..2d1d4d67f 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -215,7 +215,7 @@ private:
215 const auto& amiibo = nfp_interface.GetAmiiboBuffer(); 215 const auto& amiibo = nfp_interface.GetAmiiboBuffer();
216 const TagInfo tag_info{ 216 const TagInfo tag_info{
217 .uuid = amiibo.uuid, 217 .uuid = amiibo.uuid,
218 .uuid_length = static_cast<u8>(tag_info.uuid.size()), 218 .uuid_length = static_cast<u8>(amiibo.uuid.size()),
219 .padding_1 = {}, 219 .padding_1 = {},
220 .protocol = 1, // TODO(ogniK): Figure out actual values 220 .protocol = 1, // TODO(ogniK): Figure out actual values
221 .tag_type = 2, 221 .tag_type = 2,
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 0b306b87a..78e9cd708 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -453,7 +453,8 @@ std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protoco
453 return {-1, Errno::MFILE}; 453 return {-1, Errno::MFILE};
454 } 454 }
455 455
456 FileDescriptor& descriptor = file_descriptors[fd].emplace(); 456 file_descriptors[fd] = FileDescriptor{};
457 FileDescriptor& descriptor = *file_descriptors[fd];
457 // ENONMEM might be thrown here 458 // ENONMEM might be thrown here
458 459
459 LOG_INFO(Service, "New socket fd={}", fd); 460 LOG_INFO(Service, "New socket fd={}", fd);
@@ -548,7 +549,8 @@ std::pair<s32, Errno> BSD::AcceptImpl(s32 fd, std::vector<u8>& write_buffer) {
548 return {-1, Translate(bsd_errno)}; 549 return {-1, Translate(bsd_errno)};
549 } 550 }
550 551
551 FileDescriptor& new_descriptor = file_descriptors[new_fd].emplace(); 552 file_descriptors[new_fd] = FileDescriptor{};
553 FileDescriptor& new_descriptor = *file_descriptors[new_fd];
552 new_descriptor.socket = std::move(result.socket); 554 new_descriptor.socket = std::move(result.socket);
553 new_descriptor.is_connection_based = descriptor.is_connection_based; 555 new_descriptor.is_connection_based = descriptor.is_connection_based;
554 556
diff --git a/src/core/settings.h b/src/core/settings.h
index a324530bd..d849dded3 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -181,12 +181,13 @@ struct Values {
181 std::string motion_device; 181 std::string motion_device;
182 std::string udp_input_servers; 182 std::string udp_input_servers;
183 183
184 bool emulate_analog_keyboard; 184 bool mouse_panning;
185 185 float mouse_panning_sensitivity;
186 bool mouse_enabled; 186 bool mouse_enabled;
187 std::string mouse_device; 187 std::string mouse_device;
188 MouseButtonsRaw mouse_buttons; 188 MouseButtonsRaw mouse_buttons;
189 189
190 bool emulate_analog_keyboard;
190 bool keyboard_enabled; 191 bool keyboard_enabled;
191 KeyboardKeysRaw keyboard_keys; 192 KeyboardKeysRaw keyboard_keys;
192 KeyboardModsRaw keyboard_mods; 193 KeyboardModsRaw keyboard_mods;
diff --git a/src/input_common/mouse/mouse_input.cpp b/src/input_common/mouse/mouse_input.cpp
index 10786a541..b864d26f2 100644
--- a/src/input_common/mouse/mouse_input.cpp
+++ b/src/input_common/mouse/mouse_input.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2+ 2// Licensed under GPLv2+
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/settings.h"
5#include "input_common/mouse/mouse_input.h" 6#include "input_common/mouse/mouse_input.h"
6 7
7namespace MouseInput { 8namespace MouseInput {
@@ -32,10 +33,18 @@ void Mouse::UpdateThread() {
32 info.motion.UpdateOrientation(update_time * 1000); 33 info.motion.UpdateOrientation(update_time * 1000);
33 info.tilt_speed = 0; 34 info.tilt_speed = 0;
34 info.data.motion = info.motion.GetMotion(); 35 info.data.motion = info.motion.GetMotion();
36 if (Settings::values.mouse_panning) {
37 info.last_mouse_change *= 0.96f;
38 info.data.axis = {static_cast<int>(16 * info.last_mouse_change.x),
39 static_cast<int>(16 * -info.last_mouse_change.y)};
40 }
35 } 41 }
36 if (configuring) { 42 if (configuring) {
37 UpdateYuzuSettings(); 43 UpdateYuzuSettings();
38 } 44 }
45 if (mouse_panning_timout++ > 20) {
46 StopPanning();
47 }
39 std::this_thread::sleep_for(std::chrono::milliseconds(update_time)); 48 std::this_thread::sleep_for(std::chrono::milliseconds(update_time));
40 } 49 }
41} 50}
@@ -65,8 +74,45 @@ void Mouse::PressButton(int x, int y, int button_) {
65 mouse_info[button_index].data.pressed = true; 74 mouse_info[button_index].data.pressed = true;
66} 75}
67 76
68void Mouse::MouseMove(int x, int y) { 77void Mouse::StopPanning() {
69 for (MouseInfo& info : mouse_info) { 78 for (MouseInfo& info : mouse_info) {
79 if (Settings::values.mouse_panning) {
80 info.data.axis = {};
81 info.tilt_speed = 0;
82 info.last_mouse_change = {};
83 }
84 }
85}
86
87void Mouse::MouseMove(int x, int y, int center_x, int center_y) {
88 for (MouseInfo& info : mouse_info) {
89 if (Settings::values.mouse_panning) {
90 auto mouse_change =
91 (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
92 mouse_panning_timout = 0;
93
94 if (mouse_change.y == 0 && mouse_change.x == 0) {
95 continue;
96 }
97 const auto mouse_change_length = mouse_change.Length();
98 if (mouse_change_length < 3.0f) {
99 mouse_change /= mouse_change_length / 3.0f;
100 }
101
102 info.last_mouse_change = (info.last_mouse_change * 0.91f) + (mouse_change * 0.09f);
103
104 const auto last_mouse_change_length = info.last_mouse_change.Length();
105 if (last_mouse_change_length > 8.0f) {
106 info.last_mouse_change /= last_mouse_change_length / 8.0f;
107 } else if (last_mouse_change_length < 1.0f) {
108 info.last_mouse_change = mouse_change / mouse_change.Length();
109 }
110
111 info.tilt_direction = info.last_mouse_change;
112 info.tilt_speed = info.tilt_direction.Normalize() * info.sensitivity;
113 continue;
114 }
115
70 if (info.data.pressed) { 116 if (info.data.pressed) {
71 const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin; 117 const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin;
72 const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position; 118 const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position;
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h
index 58803c1bf..46aa676c1 100644
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -57,8 +57,10 @@ public:
57 * Signals that mouse has moved. 57 * Signals that mouse has moved.
58 * @param x the x-coordinate of the cursor 58 * @param x the x-coordinate of the cursor
59 * @param y the y-coordinate of the cursor 59 * @param y the y-coordinate of the cursor
60 * @param center_x the x-coordinate of the middle of the screen
61 * @param center_y the y-coordinate of the middle of the screen
60 */ 62 */
61 void MouseMove(int x, int y); 63 void MouseMove(int x, int y, int center_x, int center_y);
62 64
63 /** 65 /**
64 * Signals that a motion sensor tilt has ended. 66 * Signals that a motion sensor tilt has ended.
@@ -74,11 +76,13 @@ public:
74private: 76private:
75 void UpdateThread(); 77 void UpdateThread();
76 void UpdateYuzuSettings(); 78 void UpdateYuzuSettings();
79 void StopPanning();
77 80
78 struct MouseInfo { 81 struct MouseInfo {
79 InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f}; 82 InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f};
80 Common::Vec2<int> mouse_origin; 83 Common::Vec2<int> mouse_origin;
81 Common::Vec2<int> last_mouse_position; 84 Common::Vec2<int> last_mouse_position;
85 Common::Vec2<float> last_mouse_change;
82 bool is_tilting = false; 86 bool is_tilting = false;
83 float sensitivity{0.120f}; 87 float sensitivity{0.120f};
84 88
@@ -94,5 +98,6 @@ private:
94 Common::SPSCQueue<MouseStatus> mouse_queue; 98 Common::SPSCQueue<MouseStatus> mouse_queue;
95 bool configuring{false}; 99 bool configuring{false};
96 bool update_thread_running{true}; 100 bool update_thread_running{true};
101 int mouse_panning_timout{};
97}; 102};
98} // namespace MouseInput 103} // namespace MouseInput
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index 3d799b293..bb56787ee 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -6,6 +6,7 @@
6#include <utility> 6#include <utility>
7 7
8#include "common/threadsafe_queue.h" 8#include "common/threadsafe_queue.h"
9#include "core/settings.h"
9#include "input_common/mouse/mouse_input.h" 10#include "input_common/mouse/mouse_input.h"
10#include "input_common/mouse/mouse_poller.h" 11#include "input_common/mouse/mouse_poller.h"
11 12
@@ -71,7 +72,7 @@ public:
71 std::lock_guard lock{mutex}; 72 std::lock_guard lock{mutex};
72 const auto axis_value = 73 const auto axis_value =
73 static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis)); 74 static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis));
74 return axis_value / (100.0f * range); 75 return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range);
75 } 76 }
76 77
77 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { 78 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index f67de37e3..a88ae452f 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -717,6 +717,13 @@ SDLState::SDLState() {
717 if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) { 717 if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
718 LOG_ERROR(Input, "Failed to set hint for background events with: {}", SDL_GetError()); 718 LOG_ERROR(Input, "Failed to set hint for background events with: {}", SDL_GetError());
719 } 719 }
720// these hints are only defined on sdl2.0.9 or higher
721#if SDL_VERSION_ATLEAST(2, 0, 9)
722#if !SDL_VERSION_ATLEAST(2, 0, 12)
723 // There are also hints to toggle the individual drivers if needed.
724 SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI, "0");
725#endif
726#endif
720 727
721 SDL_AddEventWatch(&SDLEventWatcher, this); 728 SDL_AddEventWatch(&SDLEventWatcher, this);
722 729
diff --git a/src/input_common/settings.h b/src/input_common/settings.h
index 75486554b..a59f5d461 100644
--- a/src/input_common/settings.h
+++ b/src/input_common/settings.h
@@ -340,6 +340,7 @@ enum class ControllerType {
340 LeftJoycon, 340 LeftJoycon,
341 RightJoycon, 341 RightJoycon,
342 Handheld, 342 Handheld,
343 GameCube,
343}; 344};
344 345
345struct PlayerInput { 346struct PlayerInput {
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index 651633e9e..edced69bb 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); 471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); 472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
473} 473}
474
475TEST_CASE("BufferBase: Cached write") {
476 RasterizerInterface rasterizer;
477 BufferBase buffer(rasterizer, c, WORD);
478 buffer.UnmarkRegionAsCpuModified(c, WORD);
479 buffer.CachedCpuWrite(c + PAGE, PAGE);
480 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
481 buffer.FlushCachedWrites();
482 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
483 buffer.MarkRegionAsCpuModified(c, WORD);
484 REQUIRE(rasterizer.Count() == 0);
485}
486
487TEST_CASE("BufferBase: Multiple cached write") {
488 RasterizerInterface rasterizer;
489 BufferBase buffer(rasterizer, c, WORD);
490 buffer.UnmarkRegionAsCpuModified(c, WORD);
491 buffer.CachedCpuWrite(c + PAGE, PAGE);
492 buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
493 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
494 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
495 buffer.FlushCachedWrites();
496 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
497 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
498 buffer.MarkRegionAsCpuModified(c, WORD);
499 REQUIRE(rasterizer.Count() == 0);
500}
501
502TEST_CASE("BufferBase: Cached write unmarked") {
503 RasterizerInterface rasterizer;
504 BufferBase buffer(rasterizer, c, WORD);
505 buffer.UnmarkRegionAsCpuModified(c, WORD);
506 buffer.CachedCpuWrite(c + PAGE, PAGE);
507 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
508 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
509 buffer.FlushCachedWrites();
510 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
511 buffer.MarkRegionAsCpuModified(c, WORD);
512 REQUIRE(rasterizer.Count() == 0);
513}
514
515TEST_CASE("BufferBase: Cached write iterated") {
516 RasterizerInterface rasterizer;
517 BufferBase buffer(rasterizer, c, WORD);
518 buffer.UnmarkRegionAsCpuModified(c, WORD);
519 buffer.CachedCpuWrite(c + PAGE, PAGE);
520 int num = 0;
521 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
522 REQUIRE(num == 0);
523 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
524 buffer.FlushCachedWrites();
525 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
526 buffer.MarkRegionAsCpuModified(c, WORD);
527 REQUIRE(rasterizer.Count() == 0);
528}
529
530TEST_CASE("BufferBase: Cached write downloads") {
531 RasterizerInterface rasterizer;
532 BufferBase buffer(rasterizer, c, WORD);
533 buffer.UnmarkRegionAsCpuModified(c, WORD);
534 REQUIRE(rasterizer.Count() == 64);
535 buffer.CachedCpuWrite(c + PAGE, PAGE);
536 REQUIRE(rasterizer.Count() == 63);
537 buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
538 int num = 0;
539 buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites();
545 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
547 buffer.MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index dd4c29ed3..9b931976a 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,10 +2,8 @@ add_subdirectory(host_shaders)
2 2
3add_library(video_core STATIC 3add_library(video_core STATIC
4 buffer_cache/buffer_base.h 4 buffer_cache/buffer_base.h
5 buffer_cache/buffer_block.h 5 buffer_cache/buffer_cache.cpp
6 buffer_cache/buffer_cache.h 6 buffer_cache/buffer_cache.h
7 buffer_cache/map_interval.cpp
8 buffer_cache/map_interval.h
9 cdma_pusher.cpp 7 cdma_pusher.cpp
10 cdma_pusher.h 8 cdma_pusher.h
11 command_classes/codecs/codec.cpp 9 command_classes/codecs/codec.cpp
@@ -152,8 +150,6 @@ add_library(video_core STATIC
152 renderer_vulkan/vk_staging_buffer_pool.h 150 renderer_vulkan/vk_staging_buffer_pool.h
153 renderer_vulkan/vk_state_tracker.cpp 151 renderer_vulkan/vk_state_tracker.cpp
154 renderer_vulkan/vk_state_tracker.h 152 renderer_vulkan/vk_state_tracker.h
155 renderer_vulkan/vk_stream_buffer.cpp
156 renderer_vulkan/vk_stream_buffer.h
157 renderer_vulkan/vk_swapchain.cpp 153 renderer_vulkan/vk_swapchain.cpp
158 renderer_vulkan/vk_swapchain.h 154 renderer_vulkan/vk_swapchain.h
159 renderer_vulkan/vk_texture_cache.cpp 155 renderer_vulkan/vk_texture_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index ee8602ce9..0c00ae280 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {
19 19
20enum class BufferFlagBits { 20enum class BufferFlagBits {
21 Picked = 1 << 0, 21 Picked = 1 << 0,
22 CachedWrites = 1 << 1,
22}; 23};
23DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) 24DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
24 25
@@ -40,7 +41,7 @@ class BufferBase {
40 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; 41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
41 42
42 /// Vector tracking modified pages tightly packed with small vector optimization 43 /// Vector tracking modified pages tightly packed with small vector optimization
43 union WrittenWords { 44 union WordsArray {
44 /// Returns the pointer to the words state 45 /// Returns the pointer to the words state
45 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { 46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
46 return is_short ? &stack : heap; 47 return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
55 u64* heap; ///< Not-small buffers pointer to the storage 56 u64* heap; ///< Not-small buffers pointer to the storage
56 }; 57 };
57 58
58 struct GpuCpuWords { 59 struct Words {
59 explicit GpuCpuWords() = default; 60 explicit Words() = default;
60 explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { 61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
61 if (IsShort()) { 62 if (IsShort()) {
62 cpu.stack = ~u64{0}; 63 cpu.stack = ~u64{0};
63 gpu.stack = 0; 64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
64 } else { 67 } else {
65 // Share allocation between CPU and GPU pages and set their default values 68 // Share allocation between CPU and GPU pages and set their default values
66 const size_t num_words = NumWords(); 69 const size_t num_words = NumWords();
67 u64* const alloc = new u64[num_words * 2]; 70 u64* const alloc = new u64[num_words * 4];
68 cpu.heap = alloc; 71 cpu.heap = alloc;
69 gpu.heap = alloc + num_words; 72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
70 std::fill_n(cpu.heap, num_words, ~u64{0}); 75 std::fill_n(cpu.heap, num_words, ~u64{0});
71 std::fill_n(gpu.heap, num_words, 0); 76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
72 } 79 }
73 // Clean up tailing bits 80 // Clean up tailing bits
74 const u64 last_local_page = 81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
75 Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); 82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
76 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; 83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
77 u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; 84 const u64 last_word = (~u64{0} << shift) >> shift;
78 last_word = (last_word << shift) >> shift; 85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
79 } 87 }
80 88
81 ~GpuCpuWords() { 89 ~Words() {
82 Release(); 90 Release();
83 } 91 }
84 92
85 GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { 93 Words& operator=(Words&& rhs) noexcept {
86 Release(); 94 Release();
87 size_bytes = rhs.size_bytes; 95 size_bytes = rhs.size_bytes;
88 cpu = rhs.cpu; 96 cpu = rhs.cpu;
89 gpu = rhs.gpu; 97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
90 rhs.cpu.heap = nullptr; 100 rhs.cpu.heap = nullptr;
91 return *this; 101 return *this;
92 } 102 }
93 103
94 GpuCpuWords(GpuCpuWords&& rhs) noexcept 104 Words(Words&& rhs) noexcept
95 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { 105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
96 rhs.cpu.heap = nullptr; 107 rhs.cpu.heap = nullptr;
97 } 108 }
98 109
99 GpuCpuWords& operator=(const GpuCpuWords&) = delete; 110 Words& operator=(const Words&) = delete;
100 GpuCpuWords(const GpuCpuWords&) = delete; 111 Words(const Words&) = delete;
101 112
102 /// Returns true when the buffer fits in the small vector optimization 113 /// Returns true when the buffer fits in the small vector optimization
103 [[nodiscard]] bool IsShort() const noexcept { 114 [[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
118 } 129 }
119 130
120 u64 size_bytes = 0; 131 u64 size_bytes = 0;
121 WrittenWords cpu; 132 WordsArray cpu;
122 WrittenWords gpu; 133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
123 }; 143 };
124 144
125public: 145public:
@@ -132,68 +152,93 @@ public:
132 BufferBase& operator=(const BufferBase&) = delete; 152 BufferBase& operator=(const BufferBase&) = delete;
133 BufferBase(const BufferBase&) = delete; 153 BufferBase(const BufferBase&) = delete;
134 154
155 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default;
157
135 /// Returns the inclusive CPU modified range in a begin end pair 158 /// Returns the inclusive CPU modified range in a begin end pair
136 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, 159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
137 u64 query_size) const noexcept { 160 u64 query_size) const noexcept {
138 const u64 offset = query_cpu_addr - cpu_addr; 161 const u64 offset = query_cpu_addr - cpu_addr;
139 return ModifiedRegion<false>(offset, query_size); 162 return ModifiedRegion<Type::CPU>(offset, query_size);
140 } 163 }
141 164
142 /// Returns the inclusive GPU modified range in a begin end pair 165 /// Returns the inclusive GPU modified range in a begin end pair
143 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, 166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
144 u64 query_size) const noexcept { 167 u64 query_size) const noexcept {
145 const u64 offset = query_cpu_addr - cpu_addr; 168 const u64 offset = query_cpu_addr - cpu_addr;
146 return ModifiedRegion<true>(offset, query_size); 169 return ModifiedRegion<Type::GPU>(offset, query_size);
147 } 170 }
148 171
149 /// Returns true if a region has been modified from the CPU 172 /// Returns true if a region has been modified from the CPU
150 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
151 const u64 offset = query_cpu_addr - cpu_addr; 174 const u64 offset = query_cpu_addr - cpu_addr;
152 return IsRegionModified<false>(offset, query_size); 175 return IsRegionModified<Type::CPU>(offset, query_size);
153 } 176 }
154 177
155 /// Returns true if a region has been modified from the GPU 178 /// Returns true if a region has been modified from the GPU
156 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
157 const u64 offset = query_cpu_addr - cpu_addr; 180 const u64 offset = query_cpu_addr - cpu_addr;
158 return IsRegionModified<true>(offset, query_size); 181 return IsRegionModified<Type::GPU>(offset, query_size);
159 } 182 }
160 183
161 /// Mark region as CPU modified, notifying the rasterizer about this change 184 /// Mark region as CPU modified, notifying the rasterizer about this change
162 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
163 ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); 186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
164 } 187 }
165 188
166 /// Unmark region as CPU modified, notifying the rasterizer about this change 189 /// Unmark region as CPU modified, notifying the rasterizer about this change
167 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
168 ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); 191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
169 } 192 }
170 193
171 /// Mark region as modified from the host GPU 194 /// Mark region as modified from the host GPU
172 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
173 ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); 196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
174 } 197 }
175 198
176 /// Unmark region as modified from the host GPU 199 /// Unmark region as modified from the host GPU
177 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
178 ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); 201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 const u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 }
179 } 224 }
180 225
181 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified 226 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
182 template <typename Func> 227 template <typename Func>
183 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { 228 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
184 ForEachModifiedRange<false, true>(query_cpu_range, size, func); 229 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
185 } 230 }
186 231
187 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 232 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
188 template <typename Func> 233 template <typename Func>
189 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { 234 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
190 ForEachModifiedRange<true, false>(query_cpu_range, size, func); 235 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
191 } 236 }
192 237
193 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 238 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
194 template <typename Func> 239 template <typename Func>
195 void ForEachDownloadRange(Func&& func) { 240 void ForEachDownloadRange(Func&& func) {
196 ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); 241 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
197 } 242 }
198 243
199 /// Mark buffer as picked 244 /// Mark buffer as picked
@@ -206,6 +251,16 @@ public:
206 flags &= ~BufferFlagBits::Picked; 251 flags &= ~BufferFlagBits::Picked;
207 } 252 }
208 253
254 /// Increases the likeliness of this being a stream buffer
255 void IncreaseStreamScore(int score) noexcept {
256 stream_score += score;
257 }
258
259 /// Returns the likeliness of this being a stream buffer
260 [[nodiscard]] int StreamScore() const noexcept {
261 return stream_score;
262 }
263
209 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer 264 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
210 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { 265 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
211 return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); 266 return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
@@ -216,6 +271,11 @@ public:
216 return True(flags & BufferFlagBits::Picked); 271 return True(flags & BufferFlagBits::Picked);
217 } 272 }
218 273
274 /// Returns true when the buffer has pending cached writes
275 [[nodiscard]] bool HasCachedWrites() const noexcept {
276 return True(flags & BufferFlagBits::CachedWrites);
277 }
278
219 /// Returns the base CPU address of the buffer 279 /// Returns the base CPU address of the buffer
220 [[nodiscard]] VAddr CpuAddr() const noexcept { 280 [[nodiscard]] VAddr CpuAddr() const noexcept {
221 return cpu_addr; 281 return cpu_addr;
@@ -233,26 +293,48 @@ public:
233 } 293 }
234 294
235private: 295private:
296 template <Type type>
297 u64* Array() noexcept {
298 if constexpr (type == Type::CPU) {
299 return words.cpu.Pointer(IsShort());
300 } else if constexpr (type == Type::GPU) {
301 return words.gpu.Pointer(IsShort());
302 } else if constexpr (type == Type::CachedCPU) {
303 return words.cached_cpu.Pointer(IsShort());
304 } else if constexpr (type == Type::Untracked) {
305 return words.untracked.Pointer(IsShort());
306 }
307 }
308
309 template <Type type>
310 const u64* Array() const noexcept {
311 if constexpr (type == Type::CPU) {
312 return words.cpu.Pointer(IsShort());
313 } else if constexpr (type == Type::GPU) {
314 return words.gpu.Pointer(IsShort());
315 } else if constexpr (type == Type::CachedCPU) {
316 return words.cached_cpu.Pointer(IsShort());
317 } else if constexpr (type == Type::Untracked) {
318 return words.untracked.Pointer(IsShort());
319 }
320 }
321
236 /** 322 /**
237 * Change the state of a range of pages 323 * Change the state of a range of pages
238 * 324 *
239 * @param written_words Pages to be marked or unmarked as modified
240 * @param dirty_addr Base address to mark or unmark as modified 325 * @param dirty_addr Base address to mark or unmark as modified
241 * @param size Size in bytes to mark or unmark as modified 326 * @param size Size in bytes to mark or unmark as modified
242 *
243 * @tparam enable True when the bits will be set to one, false for zero
244 * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
245 */ 327 */
246 template <bool enable, bool notify_rasterizer> 328 template <Type type, bool enable>
247 void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, 329 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
248 s64 size) noexcept(!notify_rasterizer) {
249 const s64 difference = dirty_addr - cpu_addr; 330 const s64 difference = dirty_addr - cpu_addr;
250 const u64 offset = std::max<s64>(difference, 0); 331 const u64 offset = std::max<s64>(difference, 0);
251 size += std::min<s64>(difference, 0); 332 size += std::min<s64>(difference, 0);
252 if (offset >= SizeBytes() || size < 0) { 333 if (offset >= SizeBytes() || size < 0) {
253 return; 334 return;
254 } 335 }
255 u64* const state_words = written_words.Pointer(IsShort()); 336 u64* const untracked_words = Array<Type::Untracked>();
337 u64* const state_words = Array<type>();
256 const u64 offset_end = std::min(offset + size, SizeBytes()); 338 const u64 offset_end = std::min(offset + size, SizeBytes());
257 const u64 begin_page_index = offset / BYTES_PER_PAGE; 339 const u64 begin_page_index = offset / BYTES_PER_PAGE;
258 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; 340 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +350,19 @@ private:
268 u64 bits = ~u64{0}; 350 u64 bits = ~u64{0};
269 bits = (bits >> right_offset) << right_offset; 351 bits = (bits >> right_offset) << right_offset;
270 bits = (bits << left_offset) >> left_offset; 352 bits = (bits << left_offset) >> left_offset;
271 if constexpr (notify_rasterizer) { 353 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
272 NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); 354 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
273 } 355 }
274 if constexpr (enable) { 356 if constexpr (enable) {
275 state_words[word_index] |= bits; 357 state_words[word_index] |= bits;
358 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
359 untracked_words[word_index] |= bits;
360 }
276 } else { 361 } else {
277 state_words[word_index] &= ~bits; 362 state_words[word_index] &= ~bits;
363 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
364 untracked_words[word_index] &= ~bits;
365 }
278 } 366 }
279 page_index = 0; 367 page_index = 0;
280 ++word_index; 368 ++word_index;
@@ -291,7 +379,7 @@ private:
291 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 379 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
292 */ 380 */
293 template <bool add_to_rasterizer> 381 template <bool add_to_rasterizer>
294 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { 382 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
295 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 383 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
296 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 384 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
297 while (changed_bits != 0) { 385 while (changed_bits != 0) {
@@ -315,21 +403,20 @@ private:
315 * @param query_cpu_range Base CPU address to loop over 403 * @param query_cpu_range Base CPU address to loop over
316 * @param size Size in bytes of the CPU range to loop over 404 * @param size Size in bytes of the CPU range to loop over
317 * @param func Function to call for each turned off region 405 * @param func Function to call for each turned off region
318 *
319 * @tparam gpu True for host GPU pages, false for CPU pages
320 * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
321 */ 406 */
322 template <bool gpu, bool notify_rasterizer, typename Func> 407 template <Type type, typename Func>
323 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { 408 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
409 static_assert(type != Type::Untracked);
410
324 const s64 difference = query_cpu_range - cpu_addr; 411 const s64 difference = query_cpu_range - cpu_addr;
325 const u64 query_begin = std::max<s64>(difference, 0); 412 const u64 query_begin = std::max<s64>(difference, 0);
326 size += std::min<s64>(difference, 0); 413 size += std::min<s64>(difference, 0);
327 if (query_begin >= SizeBytes() || size < 0) { 414 if (query_begin >= SizeBytes() || size < 0) {
328 return; 415 return;
329 } 416 }
330 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 417 u64* const untracked_words = Array<Type::Untracked>();
418 u64* const state_words = Array<type>();
331 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); 419 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
332 u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
333 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; 420 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
334 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); 421 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
335 422
@@ -345,7 +432,8 @@ private:
345 const u64 word_index_end = std::distance(state_words, last_modified_word); 432 const u64 word_index_end = std::distance(state_words, last_modified_word);
346 433
347 const unsigned local_page_begin = std::countr_zero(*first_modified_word); 434 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
348 const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); 435 const unsigned local_page_end =
436 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
349 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; 437 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
350 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; 438 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
351 const u64 query_page_begin = query_begin / BYTES_PER_PAGE; 439 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +459,13 @@ private:
371 const u64 current_word = state_words[word_index] & bits; 459 const u64 current_word = state_words[word_index] & bits;
372 state_words[word_index] &= ~bits; 460 state_words[word_index] &= ~bits;
373 461
374 // Exclude CPU modified pages when visiting GPU pages 462 if constexpr (type == Type::CPU) {
375 const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); 463 const u64 current_bits = untracked_words[word_index] & bits;
376 if constexpr (notify_rasterizer) { 464 untracked_words[word_index] &= ~bits;
377 NotifyRasterizer<true>(word_index, word, ~u64{0}); 465 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
378 } 466 }
467 // Exclude CPU modified pages when visiting GPU pages
468 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
379 u64 page = page_begin; 469 u64 page = page_begin;
380 page_begin = 0; 470 page_begin = 0;
381 471
@@ -416,17 +506,20 @@ private:
416 * @param offset Offset in bytes from the start of the buffer 506 * @param offset Offset in bytes from the start of the buffer
417 * @param size Size in bytes of the region to query for modifications 507 * @param size Size in bytes of the region to query for modifications
418 */ 508 */
419 template <bool gpu> 509 template <Type type>
420 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 510 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
421 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 511 static_assert(type != Type::Untracked);
422 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 512
513 const u64* const untracked_words = Array<Type::Untracked>();
514 const u64* const state_words = Array<type>();
423 const u64 num_query_words = size / BYTES_PER_WORD + 1; 515 const u64 num_query_words = size / BYTES_PER_WORD + 1;
424 const u64 word_begin = offset / BYTES_PER_WORD; 516 const u64 word_begin = offset / BYTES_PER_WORD;
425 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 517 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
426 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 518 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
427 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 519 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
428 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 520 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
429 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 521 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
522 const u64 word = state_words[word_index] & ~off_word;
430 if (word == 0) { 523 if (word == 0) {
431 continue; 524 continue;
432 } 525 }
@@ -445,13 +538,13 @@ private:
445 * 538 *
446 * @param offset Offset in bytes from the start of the buffer 539 * @param offset Offset in bytes from the start of the buffer
447 * @param size Size in bytes of the region to query for modifications 540 * @param size Size in bytes of the region to query for modifications
448 *
449 * @tparam gpu True to query GPU modified pages, false for CPU pages
450 */ 541 */
451 template <bool gpu> 542 template <Type type>
452 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { 543 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
453 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 544 static_assert(type != Type::Untracked);
454 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 545
546 const u64* const untracked_words = Array<Type::Untracked>();
547 const u64* const state_words = Array<type>();
455 const u64 num_query_words = size / BYTES_PER_WORD + 1; 548 const u64 num_query_words = size / BYTES_PER_WORD + 1;
456 const u64 word_begin = offset / BYTES_PER_WORD; 549 const u64 word_begin = offset / BYTES_PER_WORD;
457 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 550 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +553,8 @@ private:
460 u64 begin = std::numeric_limits<u64>::max(); 553 u64 begin = std::numeric_limits<u64>::max();
461 u64 end = 0; 554 u64 end = 0;
462 for (u64 word_index = word_begin; word_index < word_end; ++word_index) { 555 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
463 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 556 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
557 const u64 word = state_words[word_index] & ~off_word;
464 if (word == 0) { 558 if (word == 0) {
465 continue; 559 continue;
466 } 560 }
@@ -488,8 +582,9 @@ private:
488 582
489 RasterizerInterface* rasterizer = nullptr; 583 RasterizerInterface* rasterizer = nullptr;
490 VAddr cpu_addr = 0; 584 VAddr cpu_addr = 0;
491 GpuCpuWords words; 585 Words words;
492 BufferFlagBits flags{}; 586 BufferFlagBits flags{};
587 int stream_score = 0;
493}; 588};
494 589
495} // namespace VideoCommon 590} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
deleted file mode 100644
index e9306194a..000000000
--- a/src/video_core/buffer_cache/buffer_block.h
+++ /dev/null
@@ -1,62 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11class BufferBlock {
12public:
13 [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
14 return (cpu_addr < end) && (cpu_addr_end > start);
15 }
16
17 [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
18 return cpu_addr <= other_start && other_end <= cpu_addr_end;
19 }
20
21 [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
22 return static_cast<std::size_t>(in_addr - cpu_addr);
23 }
24
25 [[nodiscard]] VAddr CpuAddr() const {
26 return cpu_addr;
27 }
28
29 [[nodiscard]] VAddr CpuAddrEnd() const {
30 return cpu_addr_end;
31 }
32
33 void SetCpuAddr(VAddr new_addr) {
34 cpu_addr = new_addr;
35 cpu_addr_end = new_addr + size;
36 }
37
38 [[nodiscard]] std::size_t Size() const {
39 return size;
40 }
41
42 [[nodiscard]] u64 Epoch() const {
43 return epoch;
44 }
45
46 void SetEpoch(u64 new_epoch) {
47 epoch = new_epoch;
48 }
49
50protected:
51 explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
52 SetCpuAddr(cpu_addr_);
53 }
54
55private:
56 VAddr cpu_addr{};
57 VAddr cpu_addr_end{};
58 std::size_t size{};
59 u64 epoch{};
60};
61
62} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
new file mode 100644
index 000000000..ab32294c8
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/microprofile.h"
6
7namespace VideoCommon {
8
9MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
10MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
11MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
12
13} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83b9ee871..2a6844ab1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,591 +4,1289 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <list> 7#include <algorithm>
8#include <array>
9#include <deque>
8#include <memory> 10#include <memory>
9#include <mutex> 11#include <mutex>
12#include <span>
10#include <unordered_map> 13#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector> 14#include <vector>
14 15
15#include <boost/container/small_vector.hpp> 16#include <boost/container/small_vector.hpp>
16#include <boost/icl/interval_set.hpp>
17#include <boost/intrusive/set.hpp>
18 17
19#include "common/alignment.h"
20#include "common/assert.h"
21#include "common/common_types.h" 18#include "common/common_types.h"
22#include "common/logging/log.h" 19#include "common/div_ceil.h"
23#include "core/core.h" 20#include "common/microprofile.h"
21#include "common/scope_exit.h"
24#include "core/memory.h" 22#include "core/memory.h"
25#include "core/settings.h" 23#include "core/settings.h"
26#include "video_core/buffer_cache/buffer_block.h" 24#include "video_core/buffer_cache/buffer_base.h"
27#include "video_core/buffer_cache/map_interval.h" 25#include "video_core/delayed_destruction_ring.h"
26#include "video_core/dirty_flags.h"
27#include "video_core/engines/kepler_compute.h"
28#include "video_core/engines/maxwell_3d.h"
28#include "video_core/memory_manager.h" 29#include "video_core/memory_manager.h"
29#include "video_core/rasterizer_interface.h" 30#include "video_core/rasterizer_interface.h"
31#include "video_core/texture_cache/slot_vector.h"
32#include "video_core/texture_cache/types.h"
30 33
31namespace VideoCommon { 34namespace VideoCommon {
32 35
33template <typename Buffer, typename BufferType, typename StreamBuffer> 36MICROPROFILE_DECLARE(GPU_PrepareBuffers);
37MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
38MICROPROFILE_DECLARE(GPU_DownloadMemory);
39
40using BufferId = SlotId;
41
42constexpr u32 NUM_VERTEX_BUFFERS = 32;
43constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
44constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
45constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
46constexpr u32 NUM_STORAGE_BUFFERS = 16;
47constexpr u32 NUM_STAGES = 5;
48
49template <typename P>
34class BufferCache { 50class BufferCache {
35 using IntervalSet = boost::icl::interval_set<VAddr>; 51 // Page size for caching purposes.
36 using IntervalType = typename IntervalSet::interval_type; 52 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; 53 static constexpr u32 PAGE_BITS = 16;
54 static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
55
56 static constexpr bool IS_OPENGL = P::IS_OPENGL;
57 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
58 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
59 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
60 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
61 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
62 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
63 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
64
65 static constexpr BufferId NULL_BUFFER_ID{0};
66
67 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
68
69 using Runtime = typename P::Runtime;
70 using Buffer = typename P::Buffer;
71
72 struct Empty {};
73
74 struct OverlapResult {
75 std::vector<BufferId> ids;
76 VAddr begin;
77 VAddr end;
78 bool has_stream_leap = false;
79 };
38 80
39 static constexpr u64 WRITE_PAGE_BIT = 11; 81 struct Binding {
40 static constexpr u64 BLOCK_PAGE_BITS = 21; 82 VAddr cpu_addr{};
41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; 83 u32 size{};
84 BufferId buffer_id;
85 };
42 86
43public: 87 static constexpr Binding NULL_BINDING{
44 struct BufferInfo { 88 .cpu_addr = 0,
45 BufferType handle; 89 .size = 0,
46 u64 offset; 90 .buffer_id = NULL_BUFFER_ID,
47 u64 address;
48 }; 91 };
49 92
50 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 93public:
51 bool is_written = false, bool use_fast_cbuf = false) { 94 static constexpr u32 SKIP_CACHE_SIZE = 4096;
52 std::lock_guard lock{mutex};
53 95
54 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 96 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
55 if (!cpu_addr) { 97 Tegra::Engines::Maxwell3D& maxwell3d_,
56 return GetEmptyBuffer(size); 98 Tegra::Engines::KeplerCompute& kepler_compute_,
57 } 99 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
100 Runtime& runtime_);
58 101
59 // Cache management is a big overhead, so only cache entries with a given size. 102 void TickFrame();
60 // TODO: Figure out which size is the best for given games.
61 constexpr std::size_t max_stream_size = 0x800;
62 if (use_fast_cbuf || size < max_stream_size) {
63 if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
64 const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
65 if (use_fast_cbuf) {
66 u8* dest;
67 if (is_granular) {
68 dest = gpu_memory.GetPointer(gpu_addr);
69 } else {
70 staging_buffer.resize(size);
71 dest = staging_buffer.data();
72 gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
73 }
74 return ConstBufferUpload(dest, size);
75 }
76 if (is_granular) {
77 u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
78 return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
79 std::memcpy(dest, host_ptr, size);
80 });
81 } else {
82 return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
83 gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
84 });
85 }
86 }
87 }
88 103
89 Buffer* const block = GetBlock(*cpu_addr, size); 104 void WriteMemory(VAddr cpu_addr, u64 size);
90 MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
91 if (!map) {
92 return GetEmptyBuffer(size);
93 }
94 if (is_written) {
95 map->MarkAsModified(true, GetModifiedTicks());
96 if (Settings::IsGPULevelHigh() &&
97 Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
98 MarkForAsyncFlush(map);
99 }
100 if (!map->is_written) {
101 map->is_written = true;
102 MarkRegionAsWritten(map->start, map->end - 1);
103 }
104 }
105 105
106 return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; 106 void CachedWriteMemory(VAddr cpu_addr, u64 size);
107 }
108 107
109 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. 108 void DownloadMemory(VAddr cpu_addr, u64 size);
110 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
111 std::size_t alignment = 4) {
112 std::lock_guard lock{mutex};
113 return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
114 std::memcpy(dest, raw_pointer, size);
115 });
116 }
117 109
118 /// Prepares the buffer cache for data uploading 110 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
119 /// @param max_size Maximum number of bytes that will be uploaded
120 /// @return True when a stream buffer invalidation was required, false otherwise
121 void Map(std::size_t max_size) {
122 std::lock_guard lock{mutex};
123 111
124 std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); 112 void UpdateGraphicsBuffers(bool is_indexed);
125 buffer_offset = buffer_offset_base;
126 }
127 113
128 /// Finishes the upload stream 114 void UpdateComputeBuffers();
129 void Unmap() {
130 std::lock_guard lock{mutex};
131 stream_buffer.Unmap(buffer_offset - buffer_offset_base);
132 }
133 115
134 /// Function called at the end of each frame, inteded for deferred operations 116 void BindHostGeometryBuffers(bool is_indexed);
135 void TickFrame() {
136 ++epoch;
137 117
138 while (!pending_destruction.empty()) { 118 void BindHostStageBuffers(size_t stage);
139 // Delay at least 4 frames before destruction.
140 // This is due to triple buffering happening on some drivers.
141 static constexpr u64 epochs_to_destroy = 5;
142 if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
143 break;
144 }
145 pending_destruction.pop();
146 }
147 }
148 119
149 /// Write any cached resources overlapping the specified region back to memory 120 void BindHostComputeBuffers();
150 void FlushRegion(VAddr addr, std::size_t size) {
151 std::lock_guard lock{mutex};
152 121
153 VectorMapInterval objects = GetMapsInRange(addr, size); 122 void SetEnabledUniformBuffers(size_t stage, u32 enabled);
154 std::sort(objects.begin(), objects.end(),
155 [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
156 for (MapInterval* object : objects) {
157 if (object->is_modified && object->is_registered) {
158 mutex.unlock();
159 FlushMap(object);
160 mutex.lock();
161 }
162 }
163 }
164 123
165 bool MustFlushRegion(VAddr addr, std::size_t size) { 124 void SetEnabledComputeUniformBuffers(u32 enabled);
166 std::lock_guard lock{mutex};
167 125
168 const VectorMapInterval objects = GetMapsInRange(addr, size); 126 void UnbindGraphicsStorageBuffers(size_t stage);
169 return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
170 return map->is_modified && map->is_registered;
171 });
172 }
173 127
174 /// Mark the specified region as being invalidated 128 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
175 void InvalidateRegion(VAddr addr, u64 size) { 129 bool is_written);
176 std::lock_guard lock{mutex};
177 130
178 for (auto& object : GetMapsInRange(addr, size)) { 131 void UnbindComputeStorageBuffers();
179 if (object->is_registered) {
180 Unregister(object);
181 }
182 }
183 }
184 132
185 void OnCPUWrite(VAddr addr, std::size_t size) { 133 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
186 std::lock_guard lock{mutex}; 134 bool is_written);
187 135
188 for (MapInterval* object : GetMapsInRange(addr, size)) { 136 void FlushCachedWrites();
189 if (object->is_memory_marked && object->is_registered) {
190 UnmarkMemory(object);
191 object->is_sync_pending = true;
192 marked_for_unregister.emplace_back(object);
193 }
194 }
195 }
196 137
197 void SyncGuestHost() { 138 /// Return true when there are uncommitted buffers to be downloaded
198 std::lock_guard lock{mutex}; 139 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
199 140
200 for (auto& object : marked_for_unregister) { 141 /// Return true when the caller should wait for async downloads
201 if (object->is_registered) { 142 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
202 object->is_sync_pending = false; 143
203 Unregister(object); 144 /// Commit asynchronous downloads
204 } 145 void CommitAsyncFlushes();
146
147 /// Pop asynchronous downloads
148 void PopAsyncFlushes();
149
150 /// Return true when a CPU region is modified from the GPU
151 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
152
153 std::mutex mutex;
154
155private:
156 template <typename Func>
157 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
158 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
159 const int disabled_bits = std::countr_zero(enabled_mask);
160 index += disabled_bits;
161 enabled_mask >>= disabled_bits;
162 func(index);
205 } 163 }
206 marked_for_unregister.clear();
207 } 164 }
208 165
209 void CommitAsyncFlushes() { 166 template <typename Func>
210 if (uncommitted_flushes) { 167 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
211 auto commit_list = std::make_shared<std::list<MapInterval*>>(); 168 const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
212 for (MapInterval* map : *uncommitted_flushes) { 169 for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
213 if (map->is_registered && map->is_modified) { 170 const BufferId buffer_id = page_table[page];
214 // TODO(Blinkhawk): Implement backend asynchronous flushing 171 if (!buffer_id) {
215 // AsyncFlushMap(map) 172 ++page;
216 commit_list->push_back(map); 173 continue;
217 }
218 }
219 if (!commit_list->empty()) {
220 committed_flushes.push_back(commit_list);
221 } else {
222 committed_flushes.emplace_back();
223 } 174 }
224 } else { 175 Buffer& buffer = slot_buffers[buffer_id];
225 committed_flushes.emplace_back(); 176 func(buffer_id, buffer);
177
178 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
179 page = Common::DivCeil(end_addr, PAGE_SIZE);
226 } 180 }
227 uncommitted_flushes.reset();
228 } 181 }
229 182
230 bool ShouldWaitAsyncFlushes() const { 183 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
231 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 184 return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
185 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
232 } 186 }
233 187
234 bool HasUncommittedFlushes() const { 188 void BindHostIndexBuffer();
235 return uncommitted_flushes != nullptr;
236 }
237 189
238 void PopAsyncFlushes() { 190 void BindHostVertexBuffers();
239 if (committed_flushes.empty()) {
240 return;
241 }
242 auto& flush_list = committed_flushes.front();
243 if (!flush_list) {
244 committed_flushes.pop_front();
245 return;
246 }
247 for (MapInterval* map : *flush_list) {
248 if (map->is_registered) {
249 // TODO(Blinkhawk): Replace this for reading the asynchronous flush
250 FlushMap(map);
251 }
252 }
253 committed_flushes.pop_front();
254 }
255 191
256 virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; 192 void BindHostGraphicsUniformBuffers(size_t stage);
257 193
258protected: 194 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
259 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
260 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
261 StreamBuffer& stream_buffer_)
262 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
263 stream_buffer{stream_buffer_} {}
264 195
265 ~BufferCache() = default; 196 void BindHostGraphicsStorageBuffers(size_t stage);
266 197
267 virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; 198 void BindHostTransformFeedbackBuffers();
268 199
269 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { 200 void BindHostComputeUniformBuffers();
270 return {};
271 }
272 201
273 /// Register an object into the cache 202 void BindHostComputeStorageBuffers();
274 MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
275 const VAddr cpu_addr = new_map.start;
276 if (!cpu_addr) {
277 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
278 new_map.gpu_addr);
279 return nullptr;
280 }
281 const std::size_t size = new_map.end - new_map.start;
282 new_map.is_registered = true;
283 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
284 new_map.is_memory_marked = true;
285 if (inherit_written) {
286 MarkRegionAsWritten(new_map.start, new_map.end - 1);
287 new_map.is_written = true;
288 }
289 MapInterval* const storage = mapped_addresses_allocator.Allocate();
290 *storage = new_map;
291 mapped_addresses.insert(*storage);
292 return storage;
293 }
294 203
295 void UnmarkMemory(MapInterval* map) { 204 void DoUpdateGraphicsBuffers(bool is_indexed);
296 if (!map->is_memory_marked) { 205
297 return; 206 void DoUpdateComputeBuffers();
298 } 207
299 const std::size_t size = map->end - map->start; 208 void UpdateIndexBuffer();
300 rasterizer.UpdatePagesCachedCount(map->start, size, -1); 209
301 map->is_memory_marked = false; 210 void UpdateVertexBuffers();
302 } 211
303 212 void UpdateVertexBuffer(u32 index);
304 /// Unregisters an object from the cache 213
305 void Unregister(MapInterval* map) { 214 void UpdateUniformBuffers(size_t stage);
306 UnmarkMemory(map); 215
307 map->is_registered = false; 216 void UpdateStorageBuffers(size_t stage);
308 if (map->is_sync_pending) { 217
309 map->is_sync_pending = false; 218 void UpdateTransformFeedbackBuffers();
310 marked_for_unregister.remove(map); 219
220 void UpdateTransformFeedbackBuffer(u32 index);
221
222 void UpdateComputeUniformBuffers();
223
224 void UpdateComputeStorageBuffers();
225
226 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
227
228 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
229
230 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
231
232 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
233
234 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
235
236 void Register(BufferId buffer_id);
237
238 void Unregister(BufferId buffer_id);
239
240 template <bool insert>
241 void ChangeRegister(BufferId buffer_id);
242
243 void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
244
245 void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
246
247 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
248 std::span<BufferCopy> copies);
249
250 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
251 std::span<const BufferCopy> copies);
252
253 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
254
255 void DeleteBuffer(BufferId buffer_id);
256
257 void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
258
259 void NotifyBufferDeletion();
260
261 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
262
263 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
264
265 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
266
267 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
268
269 VideoCore::RasterizerInterface& rasterizer;
270 Tegra::Engines::Maxwell3D& maxwell3d;
271 Tegra::Engines::KeplerCompute& kepler_compute;
272 Tegra::MemoryManager& gpu_memory;
273 Core::Memory::Memory& cpu_memory;
274 Runtime& runtime;
275
276 SlotVector<Buffer> slot_buffers;
277 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
278
279 u32 last_index_count = 0;
280
281 Binding index_buffer;
282 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
283 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
284 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
285 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
286
287 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
288 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
289
290 std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
291 u32 enabled_compute_uniform_buffers = 0;
292
293 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
294 std::array<u32, NUM_STAGES> written_storage_buffers{};
295 u32 enabled_compute_storage_buffers = 0;
296 u32 written_compute_storage_buffers = 0;
297
298 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
299
300 bool has_deleted_buffers = false;
301
302 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
303 dirty_uniform_buffers{};
304
305 std::vector<BufferId> cached_write_buffer_ids;
306
307 // TODO: This data structure is not optimal and it should be reworked
308 std::vector<BufferId> uncommitted_downloads;
309 std::deque<std::vector<BufferId>> committed_downloads;
310
311 size_t immediate_buffer_capacity = 0;
312 std::unique_ptr<u8[]> immediate_buffer_alloc;
313
314 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
315};
316
317template <class P>
318BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
319 Tegra::Engines::Maxwell3D& maxwell3d_,
320 Tegra::Engines::KeplerCompute& kepler_compute_,
321 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
322 Runtime& runtime_)
323 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
324 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
325 // Ensure the first slot is used for the null buffer
326 void(slot_buffers.insert(runtime, NullBufferParams{}));
327}
328
329template <class P>
330void BufferCache<P>::TickFrame() {
331 delayed_destruction_ring.Tick();
332}
333
334template <class P>
335void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
336 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
337 buffer.MarkRegionAsCpuModified(cpu_addr, size);
338 });
339}
340
341template <class P>
342void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
343 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
344 if (!buffer.HasCachedWrites()) {
345 cached_write_buffer_ids.push_back(buffer_id);
311 } 346 }
312 if (map->is_written) { 347 buffer.CachedCpuWrite(cpu_addr, size);
313 UnmarkRegionAsWritten(map->start, map->end - 1); 348 });
349}
350
351template <class P>
352void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
353 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
354 boost::container::small_vector<BufferCopy, 1> copies;
355 u64 total_size_bytes = 0;
356 u64 largest_copy = 0;
357 buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
358 copies.push_back(BufferCopy{
359 .src_offset = range_offset,
360 .dst_offset = total_size_bytes,
361 .size = range_size,
362 });
363 total_size_bytes += range_size;
364 largest_copy = std::max(largest_copy, range_size);
365 });
366 if (total_size_bytes == 0) {
367 return;
314 } 368 }
315 const auto it = mapped_addresses.find(*map); 369 MICROPROFILE_SCOPE(GPU_DownloadMemory);
316 ASSERT(it != mapped_addresses.end()); 370
317 mapped_addresses.erase(it); 371 if constexpr (USE_MEMORY_MAPS) {
318 mapped_addresses_allocator.Release(map); 372 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
319 } 373 const u8* const mapped_memory = download_staging.mapped_span.data();
320 374 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
321private: 375 for (BufferCopy& copy : copies) {
322 MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { 376 // Modify copies to have the staging offset in mind
323 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); 377 copy.dst_offset += download_staging.offset;
324 if (overlaps.empty()) {
325 const VAddr cpu_addr_end = cpu_addr + size;
326 if (gpu_memory.IsGranularRange(gpu_addr, size)) {
327 u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
328 block->Upload(block->Offset(cpu_addr), size, host_ptr);
329 } else {
330 staging_buffer.resize(size);
331 gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
332 block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
333 } 378 }
334 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); 379 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
335 } 380 runtime.Finish();
336 381 for (const BufferCopy& copy : copies) {
337 const VAddr cpu_addr_end = cpu_addr + size; 382 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
338 if (overlaps.size() == 1) { 383 // Undo the modified offset
339 MapInterval* const current_map = overlaps[0]; 384 const u64 dst_offset = copy.dst_offset - download_staging.offset;
340 if (current_map->IsInside(cpu_addr, cpu_addr_end)) { 385 const u8* copy_mapped_memory = mapped_memory + dst_offset;
341 return current_map; 386 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
387 }
388 } else {
389 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
390 for (const BufferCopy& copy : copies) {
391 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
392 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
393 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
342 } 394 }
343 } 395 }
344 VAddr new_start = cpu_addr; 396 });
345 VAddr new_end = cpu_addr_end; 397}
346 bool write_inheritance = false; 398
347 bool modified_inheritance = false; 399template <class P>
348 // Calculate new buffer parameters 400void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
349 for (MapInterval* overlap : overlaps) { 401 u32 size) {
350 new_start = std::min(overlap->start, new_start); 402 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
351 new_end = std::max(overlap->end, new_end); 403 if (!cpu_addr) {
352 write_inheritance |= overlap->is_written; 404 uniform_buffers[stage][index] = NULL_BINDING;
353 modified_inheritance |= overlap->is_modified; 405 return;
406 }
407 const Binding binding{
408 .cpu_addr = *cpu_addr,
409 .size = size,
410 .buffer_id = BufferId{},
411 };
412 uniform_buffers[stage][index] = binding;
413}
414
415template <class P>
416void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
417 MICROPROFILE_SCOPE(GPU_PrepareBuffers);
418 do {
419 has_deleted_buffers = false;
420 DoUpdateGraphicsBuffers(is_indexed);
421 } while (has_deleted_buffers);
422}
423
424template <class P>
425void BufferCache<P>::UpdateComputeBuffers() {
426 MICROPROFILE_SCOPE(GPU_PrepareBuffers);
427 do {
428 has_deleted_buffers = false;
429 DoUpdateComputeBuffers();
430 } while (has_deleted_buffers);
431}
432
433template <class P>
434void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
435 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
436 if (is_indexed) {
437 BindHostIndexBuffer();
438 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
439 const auto& regs = maxwell3d.regs;
440 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
441 runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
354 } 442 }
355 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; 443 }
356 for (auto& overlap : overlaps) { 444 BindHostVertexBuffers();
357 Unregister(overlap); 445 BindHostTransformFeedbackBuffers();
446}
447
448template <class P>
449void BufferCache<P>::BindHostStageBuffers(size_t stage) {
450 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
451 BindHostGraphicsUniformBuffers(stage);
452 BindHostGraphicsStorageBuffers(stage);
453}
454
455template <class P>
456void BufferCache<P>::BindHostComputeBuffers() {
457 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
458 BindHostComputeUniformBuffers();
459 BindHostComputeStorageBuffers();
460}
461
462template <class P>
463void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
464 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
465 if (enabled_uniform_buffers[stage] != enabled) {
466 dirty_uniform_buffers[stage] = ~u32{0};
358 } 467 }
359 UpdateBlock(block, new_start, new_end, overlaps); 468 }
360 469 enabled_uniform_buffers[stage] = enabled;
361 const MapInterval new_map{new_start, new_end, new_gpu_addr}; 470}
362 MapInterval* const map = Register(new_map, write_inheritance); 471
363 if (!map) { 472template <class P>
364 return nullptr; 473void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
474 enabled_compute_uniform_buffers = enabled;
475}
476
477template <class P>
478void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
479 enabled_storage_buffers[stage] = 0;
480 written_storage_buffers[stage] = 0;
481}
482
483template <class P>
484void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
485 u32 cbuf_offset, bool is_written) {
486 enabled_storage_buffers[stage] |= 1U << ssbo_index;
487 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
488
489 const auto& cbufs = maxwell3d.state.shader_stages[stage];
490 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
491 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
492}
493
494template <class P>
495void BufferCache<P>::UnbindComputeStorageBuffers() {
496 enabled_compute_storage_buffers = 0;
497 written_compute_storage_buffers = 0;
498}
499
500template <class P>
501void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
502 bool is_written) {
503 enabled_compute_storage_buffers |= 1U << ssbo_index;
504 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
505
506 const auto& launch_desc = kepler_compute.launch_description;
507 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
508
509 const auto& cbufs = launch_desc.const_buffer_config;
510 const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
511 compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
512}
513
514template <class P>
515void BufferCache<P>::FlushCachedWrites() {
516 for (const BufferId buffer_id : cached_write_buffer_ids) {
517 slot_buffers[buffer_id].FlushCachedWrites();
518 }
519 cached_write_buffer_ids.clear();
520}
521
522template <class P>
523bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
524 return !uncommitted_downloads.empty();
525}
526
527template <class P>
528bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
529 return !committed_downloads.empty() && !committed_downloads.front().empty();
530}
531
532template <class P>
533void BufferCache<P>::CommitAsyncFlushes() {
534 // This is intentionally passing the value by copy
535 committed_downloads.push_front(uncommitted_downloads);
536 uncommitted_downloads.clear();
537}
538
539template <class P>
540void BufferCache<P>::PopAsyncFlushes() {
541 if (committed_downloads.empty()) {
542 return;
543 }
544 auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
545 const std::span<const BufferId> download_ids = committed_downloads.back();
546 if (download_ids.empty()) {
547 return;
548 }
549 MICROPROFILE_SCOPE(GPU_DownloadMemory);
550
551 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
552 u64 total_size_bytes = 0;
553 u64 largest_copy = 0;
554 for (const BufferId buffer_id : download_ids) {
555 slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
556 downloads.push_back({
557 BufferCopy{
558 .src_offset = range_offset,
559 .dst_offset = total_size_bytes,
560 .size = range_size,
561 },
562 buffer_id,
563 });
564 total_size_bytes += range_size;
565 largest_copy = std::max(largest_copy, range_size);
566 });
567 }
568 if (downloads.empty()) {
569 return;
570 }
571 if constexpr (USE_MEMORY_MAPS) {
572 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
573 for (auto& [copy, buffer_id] : downloads) {
574 // Have in mind the staging buffer offset for the copy
575 copy.dst_offset += download_staging.offset;
576 const std::array copies{copy};
577 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
365 } 578 }
366 if (modified_inheritance) { 579 runtime.Finish();
367 map->MarkAsModified(true, GetModifiedTicks()); 580 for (const auto [copy, buffer_id] : downloads) {
368 if (Settings::IsGPULevelHigh() && 581 const Buffer& buffer = slot_buffers[buffer_id];
369 Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 582 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
370 MarkForAsyncFlush(map); 583 // Undo the modified offset
371 } 584 const u64 dst_offset = copy.dst_offset - download_staging.offset;
585 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
586 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
587 }
588 } else {
589 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
590 for (const auto [copy, buffer_id] : downloads) {
591 Buffer& buffer = slot_buffers[buffer_id];
592 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
593 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
594 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
372 } 595 }
373 return map;
374 } 596 }
375 597}
376 void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { 598
377 const IntervalType base_interval{start, end}; 599template <class P>
378 IntervalSet interval_set{}; 600bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
379 interval_set.add(base_interval); 601 const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
380 for (auto& overlap : overlaps) { 602 for (u64 page = addr >> PAGE_BITS; page < page_end;) {
381 const IntervalType subtract{overlap->start, overlap->end}; 603 const BufferId image_id = page_table[page];
382 interval_set.subtract(subtract); 604 if (!image_id) {
605 ++page;
606 continue;
383 } 607 }
384 for (auto& interval : interval_set) { 608 Buffer& buffer = slot_buffers[image_id];
385 const std::size_t size = interval.upper() - interval.lower(); 609 if (buffer.IsRegionGpuModified(addr, size)) {
386 if (size == 0) { 610 return true;
387 continue;
388 }
389 staging_buffer.resize(size);
390 cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
391 block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
392 } 611 }
612 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
613 page = Common::DivCeil(end_addr, PAGE_SIZE);
393 } 614 }
394 615 return false;
395 VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { 616}
396 VectorMapInterval result; 617
397 if (size == 0) { 618template <class P>
398 return result; 619void BufferCache<P>::BindHostIndexBuffer() {
620 Buffer& buffer = slot_buffers[index_buffer.buffer_id];
621 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
622 const u32 size = index_buffer.size;
623 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
624 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
625 runtime.BindIndexBuffer(buffer, offset, size);
626 } else {
627 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
628 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
629 buffer, offset, size);
630 }
631}
632
633template <class P>
634void BufferCache<P>::BindHostVertexBuffers() {
635 auto& flags = maxwell3d.dirty.flags;
636 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
637 const Binding& binding = vertex_buffers[index];
638 Buffer& buffer = slot_buffers[binding.buffer_id];
639 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
640 if (!flags[Dirty::VertexBuffer0 + index]) {
641 continue;
399 } 642 }
643 flags[Dirty::VertexBuffer0 + index] = false;
400 644
401 const VAddr addr_end = addr + size; 645 const u32 stride = maxwell3d.regs.vertex_array[index].stride;
402 auto it = mapped_addresses.lower_bound(addr); 646 const u32 offset = buffer.Offset(binding.cpu_addr);
403 if (it != mapped_addresses.begin()) { 647 runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
404 --it; 648 }
649}
650
651template <class P>
652void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
653 u32 dirty = ~0U;
654 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
655 dirty = std::exchange(dirty_uniform_buffers[stage], 0);
656 }
657 u32 binding_index = 0;
658 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
659 const bool needs_bind = ((dirty >> index) & 1) != 0;
660 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
661 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
662 ++binding_index;
405 } 663 }
406 while (it != mapped_addresses.end() && it->start < addr_end) { 664 });
407 if (it->Overlaps(addr, addr_end)) { 665}
408 result.push_back(&*it); 666
667template <class P>
668void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
669 bool needs_bind) {
670 const Binding& binding = uniform_buffers[stage][index];
671 const VAddr cpu_addr = binding.cpu_addr;
672 const u32 size = binding.size;
673 Buffer& buffer = slot_buffers[binding.buffer_id];
674 if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
675 if constexpr (IS_OPENGL) {
676 if (runtime.HasFastBufferSubData()) {
677 // Fast path for Nvidia
678 if (!HasFastUniformBufferBound(stage, binding_index)) {
679 // We only have to bind when the currently bound buffer is not the fast version
680 runtime.BindFastUniformBuffer(stage, binding_index, size);
681 }
682 const auto span = ImmediateBufferWithData(cpu_addr, size);
683 runtime.PushFastUniformBuffer(stage, binding_index, span);
684 return;
409 } 685 }
410 ++it;
411 } 686 }
412 return result; 687 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
413 }
414 688
415 /// Returns a ticks counter used for tracking when cached objects were last modified 689 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
416 u64 GetModifiedTicks() { 690 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
417 return ++modified_ticks; 691 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
692 return;
418 } 693 }
419 694 // Classic cached path
420 void FlushMap(MapInterval* map) { 695 SynchronizeBuffer(buffer, cpu_addr, size);
421 const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); 696 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
422 ASSERT_OR_EXECUTE(it != blocks.end(), return;); 697 // Skip binding if it's not needed and if the bound buffer is not the fast version
423 698 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
424 std::shared_ptr<Buffer> block = it->second; 699 return;
425
426 const std::size_t size = map->end - map->start;
427 staging_buffer.resize(size);
428 block->Download(block->Offset(map->start), size, staging_buffer.data());
429 cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
430 map->MarkAsModified(false, 0);
431 } 700 }
701 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
432 702
433 template <typename Callable> 703 const u32 offset = buffer.Offset(cpu_addr);
434 BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { 704 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
435 AlignBuffer(alignment); 705 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
436 const std::size_t uploaded_offset = buffer_offset; 706 } else {
437 callable(buffer_ptr); 707 runtime.BindUniformBuffer(buffer, offset, size);
438
439 buffer_ptr += size;
440 buffer_offset += size;
441 return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
442 } 708 }
709}
710
711template <class P>
712void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
713 u32 binding_index = 0;
714 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
715 const Binding& binding = storage_buffers[stage][index];
716 Buffer& buffer = slot_buffers[binding.buffer_id];
717 const u32 size = binding.size;
718 SynchronizeBuffer(buffer, binding.cpu_addr, size);
719
720 const u32 offset = buffer.Offset(binding.cpu_addr);
721 const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
722 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
723 runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
724 ++binding_index;
725 } else {
726 runtime.BindStorageBuffer(buffer, offset, size, is_written);
727 }
728 });
729}
443 730
444 void AlignBuffer(std::size_t alignment) { 731template <class P>
445 // Align the offset, not the mapped pointer 732void BufferCache<P>::BindHostTransformFeedbackBuffers() {
446 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); 733 if (maxwell3d.regs.tfb_enabled == 0) {
447 buffer_ptr += offset_aligned - buffer_offset; 734 return;
448 buffer_offset = offset_aligned;
449 } 735 }
736 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
737 const Binding& binding = transform_feedback_buffers[index];
738 Buffer& buffer = slot_buffers[binding.buffer_id];
739 const u32 size = binding.size;
740 SynchronizeBuffer(buffer, binding.cpu_addr, size);
741
742 const u32 offset = buffer.Offset(binding.cpu_addr);
743 runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
744 }
745}
450 746
451 std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { 747template <class P>
452 const std::size_t old_size = buffer->Size(); 748void BufferCache<P>::BindHostComputeUniformBuffers() {
453 const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; 749 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
454 const VAddr cpu_addr = buffer->CpuAddr(); 750 // Mark all uniform buffers as dirty
455 std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); 751 dirty_uniform_buffers.fill(~u32{0});
456 new_buffer->CopyFrom(*buffer, 0, 0, old_size); 752 }
457 QueueDestruction(std::move(buffer)); 753 u32 binding_index = 0;
458 754 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
459 const VAddr cpu_addr_end = cpu_addr + new_size - 1; 755 const Binding& binding = compute_uniform_buffers[index];
460 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; 756 Buffer& buffer = slot_buffers[binding.buffer_id];
461 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { 757 const u32 size = binding.size;
462 blocks.insert_or_assign(page_start, new_buffer); 758 SynchronizeBuffer(buffer, binding.cpu_addr, size);
759
760 const u32 offset = buffer.Offset(binding.cpu_addr);
761 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
762 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
763 ++binding_index;
764 } else {
765 runtime.BindUniformBuffer(buffer, offset, size);
463 } 766 }
767 });
768}
769
770template <class P>
771void BufferCache<P>::BindHostComputeStorageBuffers() {
772 u32 binding_index = 0;
773 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
774 const Binding& binding = compute_storage_buffers[index];
775 Buffer& buffer = slot_buffers[binding.buffer_id];
776 const u32 size = binding.size;
777 SynchronizeBuffer(buffer, binding.cpu_addr, size);
778
779 const u32 offset = buffer.Offset(binding.cpu_addr);
780 const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
781 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
782 runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
783 ++binding_index;
784 } else {
785 runtime.BindStorageBuffer(buffer, offset, size, is_written);
786 }
787 });
788}
464 789
465 return new_buffer; 790template <class P>
791void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
792 if (is_indexed) {
793 UpdateIndexBuffer();
466 } 794 }
795 UpdateVertexBuffers();
796 UpdateTransformFeedbackBuffers();
797 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
798 UpdateUniformBuffers(stage);
799 UpdateStorageBuffers(stage);
800 }
801}
802
803template <class P>
804void BufferCache<P>::DoUpdateComputeBuffers() {
805 UpdateComputeUniformBuffers();
806 UpdateComputeStorageBuffers();
807}
808
809template <class P>
810void BufferCache<P>::UpdateIndexBuffer() {
811 // We have to check for the dirty flags and index count
812 // The index count is currently changed without updating the dirty flags
813 const auto& index_array = maxwell3d.regs.index_array;
814 auto& flags = maxwell3d.dirty.flags;
815 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
816 return;
817 }
818 flags[Dirty::IndexBuffer] = false;
819 last_index_count = index_array.count;
820
821 const GPUVAddr gpu_addr_begin = index_array.StartAddress();
822 const GPUVAddr gpu_addr_end = index_array.EndAddress();
823 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
824 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
825 const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
826 const u32 size = std::min(address_size, draw_size);
827 if (size == 0 || !cpu_addr) {
828 index_buffer = NULL_BINDING;
829 return;
830 }
831 index_buffer = Binding{
832 .cpu_addr = *cpu_addr,
833 .size = size,
834 .buffer_id = FindBuffer(*cpu_addr, size),
835 };
836}
467 837
468 std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, 838template <class P>
469 std::shared_ptr<Buffer> second) { 839void BufferCache<P>::UpdateVertexBuffers() {
470 const std::size_t size_1 = first->Size(); 840 auto& flags = maxwell3d.dirty.flags;
471 const std::size_t size_2 = second->Size(); 841 if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
472 const VAddr first_addr = first->CpuAddr(); 842 return;
473 const VAddr second_addr = second->CpuAddr(); 843 }
474 const VAddr new_addr = std::min(first_addr, second_addr); 844 flags[Dirty::VertexBuffers] = false;
475 const std::size_t new_size = size_1 + size_2;
476
477 std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
478 new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
479 new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
480 QueueDestruction(std::move(first));
481 QueueDestruction(std::move(second));
482 845
483 const VAddr cpu_addr_end = new_addr + new_size - 1; 846 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
484 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; 847 UpdateVertexBuffer(index);
485 for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
486 blocks.insert_or_assign(page_start, new_buffer);
487 }
488 return new_buffer;
489 } 848 }
849}
490 850
491 Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { 851template <class P>
492 std::shared_ptr<Buffer> found; 852void BufferCache<P>::UpdateVertexBuffer(u32 index) {
853 if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
854 return;
855 }
856 const auto& array = maxwell3d.regs.vertex_array[index];
857 const auto& limit = maxwell3d.regs.vertex_array_limit[index];
858 const GPUVAddr gpu_addr_begin = array.StartAddress();
859 const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
860 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
861 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
862 const u32 size = address_size; // TODO: Analyze stride and number of vertices
863 if (array.enable == 0 || size == 0 || !cpu_addr) {
864 vertex_buffers[index] = NULL_BINDING;
865 return;
866 }
867 vertex_buffers[index] = Binding{
868 .cpu_addr = *cpu_addr,
869 .size = size,
870 .buffer_id = FindBuffer(*cpu_addr, size),
871 };
872}
873
874template <class P>
875void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
876 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
877 Binding& binding = uniform_buffers[stage][index];
878 if (binding.buffer_id) {
879 // Already updated
880 return;
881 }
882 // Mark as dirty
883 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
884 dirty_uniform_buffers[stage] |= 1U << index;
885 }
886 // Resolve buffer
887 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
888 });
889}
890
891template <class P>
892void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
893 const u32 written_mask = written_storage_buffers[stage];
894 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
895 // Resolve buffer
896 Binding& binding = storage_buffers[stage][index];
897 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
898 binding.buffer_id = buffer_id;
899 // Mark buffer as written if needed
900 if (((written_mask >> index) & 1) != 0) {
901 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
902 }
903 });
904}
493 905
494 const VAddr cpu_addr_end = cpu_addr + size - 1; 906template <class P>
495 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; 907void BufferCache<P>::UpdateTransformFeedbackBuffers() {
496 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { 908 if (maxwell3d.regs.tfb_enabled == 0) {
497 auto it = blocks.find(page_start); 909 return;
498 if (it == blocks.end()) { 910 }
499 if (found) { 911 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
500 found = EnlargeBlock(found); 912 UpdateTransformFeedbackBuffer(index);
501 continue; 913 }
502 } 914}
503 const VAddr start_addr = page_start << BLOCK_PAGE_BITS; 915
504 found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); 916template <class P>
505 blocks.insert_or_assign(page_start, found); 917void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
506 continue; 918 const auto& binding = maxwell3d.regs.tfb_bindings[index];
507 } 919 const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
508 if (!found) { 920 const u32 size = binding.buffer_size;
509 found = it->second; 921 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
510 continue; 922 if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
511 } 923 transform_feedback_buffers[index] = NULL_BINDING;
512 if (found != it->second) { 924 return;
513 found = MergeBlocks(std::move(found), it->second); 925 }
926 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
927 transform_feedback_buffers[index] = Binding{
928 .cpu_addr = *cpu_addr,
929 .size = size,
930 .buffer_id = buffer_id,
931 };
932 MarkWrittenBuffer(buffer_id, *cpu_addr, size);
933}
934
935template <class P>
936void BufferCache<P>::UpdateComputeUniformBuffers() {
937 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
938 Binding& binding = compute_uniform_buffers[index];
939 binding = NULL_BINDING;
940 const auto& launch_desc = kepler_compute.launch_description;
941 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
942 const auto& cbuf = launch_desc.const_buffer_config[index];
943 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
944 if (cpu_addr) {
945 binding.cpu_addr = *cpu_addr;
946 binding.size = cbuf.size;
514 } 947 }
515 } 948 }
516 return found.get(); 949 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
950 });
951}
952
953template <class P>
954void BufferCache<P>::UpdateComputeStorageBuffers() {
955 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
956 // Resolve buffer
957 Binding& binding = compute_storage_buffers[index];
958 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
959 binding.buffer_id = buffer_id;
960 // Mark as written if needed
961 if (((written_compute_storage_buffers >> index) & 1) != 0) {
962 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
963 }
964 });
965}
966
967template <class P>
968void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
969 Buffer& buffer = slot_buffers[buffer_id];
970 buffer.MarkRegionAsGpuModified(cpu_addr, size);
971
972 const bool is_accuracy_high = Settings::IsGPULevelHigh();
973 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
974 if (!is_accuracy_high || !is_async) {
975 return;
976 }
977 if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
978 // Already inserted
979 return;
517 } 980 }
981 uncommitted_downloads.push_back(buffer_id);
982}
518 983
519 void MarkRegionAsWritten(VAddr start, VAddr end) { 984template <class P>
520 const u64 page_end = end >> WRITE_PAGE_BIT; 985BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
521 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 986 if (cpu_addr == 0) {
522 if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { 987 return NULL_BUFFER_ID;
523 ++it->second; 988 }
524 } 989 const u64 page = cpu_addr >> PAGE_BITS;
990 const BufferId buffer_id = page_table[page];
991 if (!buffer_id) {
992 return CreateBuffer(cpu_addr, size);
993 }
994 const Buffer& buffer = slot_buffers[buffer_id];
995 if (buffer.IsInBounds(cpu_addr, size)) {
996 return buffer_id;
997 }
998 return CreateBuffer(cpu_addr, size);
999}
1000
1001template <class P>
1002typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
1003 u32 wanted_size) {
1004 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1005 std::vector<BufferId> overlap_ids;
1006 VAddr begin = cpu_addr;
1007 VAddr end = cpu_addr + wanted_size;
1008 int stream_score = 0;
1009 bool has_stream_leap = false;
1010 for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
1011 const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
1012 if (!overlap_id) {
1013 continue;
1014 }
1015 Buffer& overlap = slot_buffers[overlap_id];
1016 if (overlap.IsPicked()) {
1017 continue;
1018 }
1019 overlap_ids.push_back(overlap_id);
1020 overlap.Pick();
1021 const VAddr overlap_cpu_addr = overlap.CpuAddr();
1022 if (overlap_cpu_addr < begin) {
1023 cpu_addr = begin = overlap_cpu_addr;
1024 }
1025 end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
1026
1027 stream_score += overlap.StreamScore();
1028 if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
1029 // When this memory region has been joined a bunch of times, we assume it's being used
1030 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1031 has_stream_leap = true;
1032 end += PAGE_SIZE * 256;
525 } 1033 }
526 } 1034 }
527 1035 return OverlapResult{
528 void UnmarkRegionAsWritten(VAddr start, VAddr end) { 1036 .ids = std::move(overlap_ids),
529 const u64 page_end = end >> WRITE_PAGE_BIT; 1037 .begin = begin,
530 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 1038 .end = end,
531 auto it = written_pages.find(page_start); 1039 .has_stream_leap = has_stream_leap,
532 if (it != written_pages.end()) { 1040 };
533 if (it->second > 1) { 1041}
534 --it->second; 1042
535 } else { 1043template <class P>
536 written_pages.erase(it); 1044void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
537 } 1045 bool accumulate_stream_score) {
538 } 1046 Buffer& new_buffer = slot_buffers[new_buffer_id];
1047 Buffer& overlap = slot_buffers[overlap_id];
1048 if (accumulate_stream_score) {
1049 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
1050 }
1051 std::vector<BufferCopy> copies;
1052 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
1053 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
1054 copies.push_back(BufferCopy{
1055 .src_offset = begin,
1056 .dst_offset = dst_base_offset + begin,
1057 .size = range_size,
1058 });
1059 new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
1060 new_buffer.MarkRegionAsGpuModified(begin, range_size);
1061 });
1062 if (!copies.empty()) {
1063 runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
1064 }
1065 ReplaceBufferDownloads(overlap_id, new_buffer_id);
1066 DeleteBuffer(overlap_id);
1067}
1068
1069template <class P>
1070BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1071 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1072 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1073 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1074 for (const BufferId overlap_id : overlap.ids) {
1075 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1076 }
1077 Register(new_buffer_id);
1078 return new_buffer_id;
1079}
1080
1081template <class P>
1082void BufferCache<P>::Register(BufferId buffer_id) {
1083 ChangeRegister<true>(buffer_id);
1084}
1085
1086template <class P>
1087void BufferCache<P>::Unregister(BufferId buffer_id) {
1088 ChangeRegister<false>(buffer_id);
1089}
1090
1091template <class P>
1092template <bool insert>
1093void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1094 const Buffer& buffer = slot_buffers[buffer_id];
1095 const VAddr cpu_addr_begin = buffer.CpuAddr();
1096 const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
1097 const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
1098 const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
1099 for (u64 page = page_begin; page != page_end; ++page) {
1100 if constexpr (insert) {
1101 page_table[page] = buffer_id;
1102 } else {
1103 page_table[page] = BufferId{};
539 } 1104 }
540 } 1105 }
1106}
541 1107
542 bool IsRegionWritten(VAddr start, VAddr end) const { 1108template <class P>
543 const u64 page_end = end >> WRITE_PAGE_BIT; 1109void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
544 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 1110 if (buffer.CpuAddr() == 0) {
545 if (written_pages.contains(page_start)) { 1111 return;
546 return true; 1112 }
1113 SynchronizeBufferImpl(buffer, cpu_addr, size);
1114}
1115
1116template <class P>
1117void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
1118 boost::container::small_vector<BufferCopy, 4> copies;
1119 u64 total_size_bytes = 0;
1120 u64 largest_copy = 0;
1121 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
1122 copies.push_back(BufferCopy{
1123 .src_offset = total_size_bytes,
1124 .dst_offset = range_offset,
1125 .size = range_size,
1126 });
1127 total_size_bytes += range_size;
1128 largest_copy = std::max(largest_copy, range_size);
1129 });
1130 if (total_size_bytes == 0) {
1131 return;
1132 }
1133 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1134 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1135}
1136
1137template <class P>
1138void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1139 std::span<BufferCopy> copies) {
1140 if constexpr (USE_MEMORY_MAPS) {
1141 MappedUploadMemory(buffer, total_size_bytes, copies);
1142 } else {
1143 ImmediateUploadMemory(buffer, largest_copy, copies);
1144 }
1145}
1146
1147template <class P>
1148void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
1149 std::span<const BufferCopy> copies) {
1150 std::span<u8> immediate_buffer;
1151 for (const BufferCopy& copy : copies) {
1152 std::span<const u8> upload_span;
1153 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1154 if (IsRangeGranular(cpu_addr, copy.size)) {
1155 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
1156 } else {
1157 if (immediate_buffer.empty()) {
1158 immediate_buffer = ImmediateBuffer(largest_copy);
547 } 1159 }
1160 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
1161 upload_span = immediate_buffer.subspan(0, copy.size);
548 } 1162 }
549 return false; 1163 buffer.ImmediateUpload(copy.dst_offset, upload_span);
550 } 1164 }
551 1165}
552 void QueueDestruction(std::shared_ptr<Buffer> buffer) { 1166
553 buffer->SetEpoch(epoch); 1167template <class P>
554 pending_destruction.push(std::move(buffer)); 1168void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1169 std::span<BufferCopy> copies) {
1170 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1171 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1172 for (BufferCopy& copy : copies) {
1173 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1174 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1175 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1176
1177 // Apply the staging offset
1178 copy.src_offset += upload_staging.offset;
555 } 1179 }
556 1180 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
557 void MarkForAsyncFlush(MapInterval* map) { 1181}
558 if (!uncommitted_flushes) { 1182
559 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); 1183template <class P>
1184void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1185 const auto scalar_replace = [buffer_id](Binding& binding) {
1186 if (binding.buffer_id == buffer_id) {
1187 binding.buffer_id = BufferId{};
1188 }
1189 };
1190 const auto replace = [scalar_replace](std::span<Binding> bindings) {
1191 std::ranges::for_each(bindings, scalar_replace);
1192 };
1193 scalar_replace(index_buffer);
1194 replace(vertex_buffers);
1195 std::ranges::for_each(uniform_buffers, replace);
1196 std::ranges::for_each(storage_buffers, replace);
1197 replace(transform_feedback_buffers);
1198 replace(compute_uniform_buffers);
1199 replace(compute_storage_buffers);
1200 std::erase(cached_write_buffer_ids, buffer_id);
1201
1202 // Mark the whole buffer as CPU written to stop tracking CPU writes
1203 Buffer& buffer = slot_buffers[buffer_id];
1204 buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
1205
1206 Unregister(buffer_id);
1207 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
1208
1209 NotifyBufferDeletion();
1210}
1211
1212template <class P>
1213void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
1214 const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
1215 std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
1216 if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
1217 buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
560 } 1218 }
561 uncommitted_flushes->insert(map); 1219 };
1220 replace(uncommitted_downloads);
1221 std::ranges::for_each(committed_downloads, replace);
1222}
1223
1224template <class P>
1225void BufferCache<P>::NotifyBufferDeletion() {
1226 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1227 dirty_uniform_buffers.fill(~u32{0});
562 } 1228 }
1229 auto& flags = maxwell3d.dirty.flags;
1230 flags[Dirty::IndexBuffer] = true;
1231 flags[Dirty::VertexBuffers] = true;
1232 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
1233 flags[Dirty::VertexBuffer0 + index] = true;
1234 }
1235 has_deleted_buffers = true;
1236}
1237
1238template <class P>
1239typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
1240 const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
1241 const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
1242 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1243 if (!cpu_addr || size == 0) {
1244 return NULL_BINDING;
1245 }
1246 // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
1247 // It exists due to some games like Astral Chain operate out of bounds.
1248 // Binding the whole map range would be technically correct, but games have large maps that make
1249 // this approach unaffordable for now.
1250 static constexpr u32 arbitrary_extra_bytes = 0xc000;
1251 const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
1252 const Binding binding{
1253 .cpu_addr = *cpu_addr,
1254 .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
1255 .buffer_id = BufferId{},
1256 };
1257 return binding;
1258}
1259
1260template <class P>
1261std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
1262 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
1263 if (IsRangeGranular(cpu_addr, size) ||
1264 base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
1265 return std::span(base_pointer, size);
1266 } else {
1267 const std::span<u8> span = ImmediateBuffer(size);
1268 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
1269 return span;
1270 }
1271}
563 1272
564 VideoCore::RasterizerInterface& rasterizer; 1273template <class P>
565 Tegra::MemoryManager& gpu_memory; 1274std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
566 Core::Memory::Memory& cpu_memory; 1275 if (wanted_capacity > immediate_buffer_capacity) {
567 StreamBuffer& stream_buffer; 1276 immediate_buffer_capacity = wanted_capacity;
568 1277 immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
569 u8* buffer_ptr = nullptr; 1278 }
570 u64 buffer_offset = 0; 1279 return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
571 u64 buffer_offset_base = 0; 1280}
572 1281
573 MapIntervalAllocator mapped_addresses_allocator; 1282template <class P>
574 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> 1283bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
575 mapped_addresses; 1284 if constexpr (IS_OPENGL) {
576 1285 return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
577 std::unordered_map<u64, u32> written_pages; 1286 } else {
578 std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; 1287 // Only OpenGL has fast uniform buffers
579 1288 return false;
580 std::queue<std::shared_ptr<Buffer>> pending_destruction; 1289 }
581 u64 epoch = 0; 1290}
582 u64 modified_ticks = 0;
583
584 std::vector<u8> staging_buffer;
585
586 std::list<MapInterval*> marked_for_unregister;
587
588 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
589 std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
590
591 std::recursive_mutex mutex;
592};
593 1291
594} // namespace VideoCommon 1292} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
deleted file mode 100644
index 62587e18a..000000000
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <memory>
9
10#include "video_core/buffer_cache/map_interval.h"
11
12namespace VideoCommon {
13
14MapIntervalAllocator::MapIntervalAllocator() {
15 FillFreeList(first_chunk);
16}
17
18MapIntervalAllocator::~MapIntervalAllocator() = default;
19
20void MapIntervalAllocator::AllocateNewChunk() {
21 *new_chunk = std::make_unique<Chunk>();
22 FillFreeList(**new_chunk);
23 new_chunk = &(*new_chunk)->next;
24}
25
26void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
27 const std::size_t old_size = free_list.size();
28 free_list.resize(old_size + chunk.data.size());
29 std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
30 [](MapInterval& interval) { return &interval; });
31}
32
33} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
deleted file mode 100644
index ef974b08a..000000000
--- a/src/video_core/buffer_cache/map_interval.h
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <vector>
11
12#include <boost/intrusive/set_hook.hpp>
13
14#include "common/common_types.h"
15#include "video_core/gpu.h"
16
17namespace VideoCommon {
18
19struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
20 MapInterval() = default;
21
22 /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
23
24 explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
25 : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
26
27 bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
28 return start <= other_start && other_end <= end;
29 }
30
31 bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
32 return start < other_end && other_start < end;
33 }
34
35 void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
36 is_modified = is_modified_;
37 ticks = ticks_;
38 }
39
40 boost::intrusive::set_member_hook<> member_hook_;
41 VAddr start = 0;
42 VAddr end = 0;
43 GPUVAddr gpu_addr = 0;
44 u64 ticks = 0;
45 bool is_written = false;
46 bool is_modified = false;
47 bool is_registered = false;
48 bool is_memory_marked = false;
49 bool is_sync_pending = false;
50};
51
52struct MapIntervalCompare {
53 constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
54 return lhs.start < rhs.start;
55 }
56};
57
58class MapIntervalAllocator {
59public:
60 MapIntervalAllocator();
61 ~MapIntervalAllocator();
62
63 MapInterval* Allocate() {
64 if (free_list.empty()) {
65 AllocateNewChunk();
66 }
67 MapInterval* const interval = free_list.back();
68 free_list.pop_back();
69 return interval;
70 }
71
72 void Release(MapInterval* interval) {
73 free_list.push_back(interval);
74 }
75
76private:
77 struct Chunk {
78 std::unique_ptr<Chunk> next;
79 std::array<MapInterval, 0x8000> data;
80 };
81
82 void AllocateNewChunk();
83
84 void FillFreeList(Chunk& chunk);
85
86 std::vector<MapInterval*> free_list;
87
88 Chunk first_chunk;
89
90 std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
91};
92
93} // namespace VideoCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 55e632346..2b7569335 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -110,12 +110,10 @@ void Vic::Execute() {
110 converted_frame_buffer.get(), block_height, 0, 0); 110 converted_frame_buffer.get(), block_height, 0, 0);
111 111
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
113 gpu.Maxwell3D().OnMemoryWrite();
114 } else { 113 } else {
115 // send pitch linear frame 114 // send pitch linear frame
116 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, 115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
117 linear_size); 116 linear_size);
118 gpu.Maxwell3D().OnMemoryWrite();
119 } 117 }
120 break; 118 break;
121 } 119 }
@@ -163,7 +161,6 @@ void Vic::Execute() {
163 } 161 }
164 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), 162 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
165 chroma_buffer.size()); 163 chroma_buffer.size());
166 gpu.Maxwell3D().OnMemoryWrite();
167 break; 164 break;
168 } 165 }
169 default: 166 default:
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index b1eaac00c..7149af290 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -12,13 +12,30 @@
12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) 12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
13 13
14namespace VideoCommon::Dirty { 14namespace VideoCommon::Dirty {
15 15namespace {
16using Tegra::Engines::Maxwell3D; 16using Tegra::Engines::Maxwell3D;
17 17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { 18void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
19 static constexpr std::size_t num_array = 3;
20 for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
21 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
22 const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
23
24 FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
25 FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
26 }
27}
28
29void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
30 FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
31}
32
33void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
19 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); 34 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
20 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); 35 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
36}
21 37
38void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
22 static constexpr std::size_t num_per_rt = NUM(rt[0]); 39 static constexpr std::size_t num_per_rt = NUM(rt[0]);
23 static constexpr std::size_t begin = OFF(rt); 40 static constexpr std::size_t begin = OFF(rt);
24 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; 41 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
41 FillBlock(table, OFF(zeta), NUM(zeta), flag); 58 FillBlock(table, OFF(zeta), NUM(zeta), flag);
42 } 59 }
43} 60}
61} // Anonymous namespace
62
63void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
64 SetupDirtyVertexBuffers(tables);
65 SetupIndexBuffer(tables);
66 SetupDirtyDescriptors(tables);
67 SetupDirtyRenderTargets(tables);
68}
44 69
45} // namespace VideoCommon::Dirty 70} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 875527ddd..702688ace 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -30,6 +30,12 @@ enum : u8 {
30 ColorBuffer7, 30 ColorBuffer7,
31 ZetaBuffer, 31 ZetaBuffer,
32 32
33 VertexBuffers,
34 VertexBuffer0,
35 VertexBuffer31 = VertexBuffer0 + 31,
36
37 IndexBuffer,
38
33 LastCommonEntry, 39 LastCommonEntry,
34}; 40};
35 41
@@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
47 FillBlock(tables[1], begin, num, index_b); 53 FillBlock(tables[1], begin, num, index_b);
48} 54}
49 55
50void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); 56void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
51 57
52} // namespace VideoCommon::Dirty 58} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 2c8b20024..8b33c04ab 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
23 MICROPROFILE_SCOPE(DispatchCalls); 23 MICROPROFILE_SCOPE(DispatchCalls);
24 24
25 gpu.SyncGuestHost(); 25 gpu.SyncGuestHost();
26 // On entering GPU code, assume all memory may be touched by the ARM core.
27 gpu.Maxwell3D().OnMemoryWrite();
28 26
29 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
30 28
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index a01d334ad..0f640fdae 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {
18 18
19Fermi2D::~Fermi2D() = default; 19Fermi2D::~Fermi2D() = default;
20 20
21void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 21void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
22 rasterizer = &rasterizer_; 22 rasterizer = rasterizer_;
23} 23}
24 24
25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0de3280a2..c808a577d 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -38,7 +38,7 @@ public:
38 ~Fermi2D(); 38 ~Fermi2D();
39 39
40 /// Binds a rasterizer to this engine. 40 /// Binds a rasterizer to this engine.
41 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 41 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
42 42
43 /// Write the value to the register identified by method. 43 /// Write the value to the register identified by method.
44 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; 44 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index ba387506e..a9b75091e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
21 21
22KeplerCompute::~KeplerCompute() = default; 22KeplerCompute::~KeplerCompute() = default;
23 23
24void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 24void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
25 rasterizer = &rasterizer_; 25 rasterizer = rasterizer_;
26} 26}
27 27
28void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 28void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
39 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 39 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
40 upload_state.ProcessData(method_argument, is_last_call); 40 upload_state.ProcessData(method_argument, is_last_call);
41 if (is_last_call) { 41 if (is_last_call) {
42 system.GPU().Maxwell3D().OnMemoryWrite();
43 } 42 }
44 break; 43 break;
45 } 44 }
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 9f0a7b76d..7c40cba38 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -46,7 +46,7 @@ public:
46 ~KeplerCompute(); 46 ~KeplerCompute();
47 47
48 /// Binds a rasterizer to this engine. 48 /// Binds a rasterizer to this engine.
49 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 49 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
50 50
51 static constexpr std::size_t NumConstBuffers = 8; 51 static constexpr std::size_t NumConstBuffers = 8;
52 52
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 9911140e9..560551157 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
33 case KEPLERMEMORY_REG_INDEX(data): { 33 case KEPLERMEMORY_REG_INDEX(data): {
34 upload_state.ProcessData(method_argument, is_last_call); 34 upload_state.ProcessData(method_argument, is_last_call);
35 if (is_last_call) { 35 if (is_last_call) {
36 system.GPU().Maxwell3D().OnMemoryWrite();
37 } 36 }
38 break; 37 break;
39 } 38 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 116ad1722..75517a4f7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
30 30
31Maxwell3D::~Maxwell3D() = default; 31Maxwell3D::~Maxwell3D() = default;
32 32
33void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 33void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
34 rasterizer = &rasterizer_; 34 rasterizer = rasterizer_;
35} 35}
36 36
37void Maxwell3D::InitializeRegisterDefaults() { 37void Maxwell3D::InitializeRegisterDefaults() {
@@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
223 case MAXWELL3D_REG_INDEX(data_upload): 223 case MAXWELL3D_REG_INDEX(data_upload):
224 upload_state.ProcessData(argument, is_last_call); 224 upload_state.ProcessData(argument, is_last_call);
225 if (is_last_call) { 225 if (is_last_call) {
226 OnMemoryWrite();
227 } 226 }
228 return; 227 return;
229 case MAXWELL3D_REG_INDEX(fragment_barrier): 228 case MAXWELL3D_REG_INDEX(fragment_barrier):
@@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
570 } 569 }
571} 570}
572 571
573void Maxwell3D::ProcessCBBind(std::size_t stage_index) { 572void Maxwell3D::ProcessCBBind(size_t stage_index) {
574 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. 573 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
575 auto& shader = state.shader_stages[stage_index]; 574 const auto& bind_data = regs.cb_bind[stage_index];
576 auto& bind_data = regs.cb_bind[stage_index]; 575 auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
577
578 ASSERT(bind_data.index < Regs::MaxConstBuffers);
579 auto& buffer = shader.const_buffers[bind_data.index];
580
581 buffer.enabled = bind_data.valid.Value() != 0; 576 buffer.enabled = bind_data.valid.Value() != 0;
582 buffer.address = regs.const_buffer.BufferAddress(); 577 buffer.address = regs.const_buffer.BufferAddress();
583 buffer.size = regs.const_buffer.cb_size; 578 buffer.size = regs.const_buffer.cb_size;
579
580 const bool is_enabled = bind_data.valid.Value() != 0;
581 const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
582 const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
583 rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
584} 584}
585 585
586void Maxwell3D::ProcessCBData(u32 value) { 586void Maxwell3D::ProcessCBData(u32 value) {
@@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {
635 635
636 const u32 id = cb_data_state.id; 636 const u32 id = cb_data_state.id;
637 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); 637 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
638 OnMemoryWrite();
639 638
640 cb_data_state.id = null_cb_data; 639 cb_data_state.id = null_cb_data;
641 cb_data_state.current = null_cb_data; 640 cb_data_state.current = null_cb_data;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 002d1b3f9..ffed42a29 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -55,7 +55,7 @@ public:
55 ~Maxwell3D(); 55 ~Maxwell3D();
56 56
57 /// Binds a rasterizer to this engine. 57 /// Binds a rasterizer to this engine.
58 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 58 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
59 59
60 /// Register structure of the Maxwell3D engine. 60 /// Register structure of the Maxwell3D engine.
61 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. 61 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -1314,8 +1314,7 @@ public:
1314 1314
1315 GPUVAddr LimitAddress() const { 1315 GPUVAddr LimitAddress() const {
1316 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | 1316 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
1317 limit_low) + 1317 limit_low);
1318 1;
1319 } 1318 }
1320 } vertex_array_limit[NumVertexArrays]; 1319 } vertex_array_limit[NumVertexArrays];
1321 1320
@@ -1403,6 +1402,7 @@ public:
1403 }; 1402 };
1404 1403
1405 std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; 1404 std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
1405
1406 u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. 1406 u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
1407 }; 1407 };
1408 1408
@@ -1452,11 +1452,6 @@ public:
1452 return *rasterizer; 1452 return *rasterizer;
1453 } 1453 }
1454 1454
1455 /// Notify a memory write has happened.
1456 void OnMemoryWrite() {
1457 dirty.flags |= dirty.on_write_stores;
1458 }
1459
1460 enum class MMEDrawMode : u32 { 1455 enum class MMEDrawMode : u32 {
1461 Undefined, 1456 Undefined,
1462 Array, 1457 Array,
@@ -1478,7 +1473,6 @@ public:
1478 using Tables = std::array<Table, 2>; 1473 using Tables = std::array<Table, 2>;
1479 1474
1480 Flags flags; 1475 Flags flags;
1481 Flags on_write_stores;
1482 Tables tables{}; 1476 Tables tables{};
1483 } dirty; 1477 } dirty;
1484 1478
@@ -1541,7 +1535,7 @@ private:
1541 void FinishCBData(); 1535 void FinishCBData();
1542 1536
1543 /// Handles a write to the CB_BIND register. 1537 /// Handles a write to the CB_BIND register.
1544 void ProcessCBBind(std::size_t stage_index); 1538 void ProcessCBBind(size_t stage_index);
1545 1539
1546 /// Handles a write to the VERTEX_END_GL register, triggering a draw. 1540 /// Handles a write to the VERTEX_END_GL register, triggering a draw.
1547 void DrawArrays(); 1541 void DrawArrays();
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ba750748c..a2f19559f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
60 return; 60 return;
61 } 61 }
62 62
63 // All copies here update the main memory, so mark all rasterizer states as invalid.
64 system.GPU().Maxwell3D().OnMemoryWrite();
65
66 if (is_src_pitch && is_dst_pitch) { 63 if (is_src_pitch && is_dst_pitch) {
67 CopyPitchToPitch(); 64 CopyPitchToPitch();
68 } else { 65 } else {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 3512283ff..f055b61e9 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -143,22 +143,26 @@ private:
143 } 143 }
144 144
145 bool ShouldWait() const { 145 bool ShouldWait() const {
146 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
146 return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || 147 return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
147 query_cache.ShouldWaitAsyncFlushes(); 148 query_cache.ShouldWaitAsyncFlushes();
148 } 149 }
149 150
150 bool ShouldFlush() const { 151 bool ShouldFlush() const {
152 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
151 return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || 153 return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
152 query_cache.HasUncommittedFlushes(); 154 query_cache.HasUncommittedFlushes();
153 } 155 }
154 156
155 void PopAsyncFlushes() { 157 void PopAsyncFlushes() {
158 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
156 texture_cache.PopAsyncFlushes(); 159 texture_cache.PopAsyncFlushes();
157 buffer_cache.PopAsyncFlushes(); 160 buffer_cache.PopAsyncFlushes();
158 query_cache.PopAsyncFlushes(); 161 query_cache.PopAsyncFlushes();
159 } 162 }
160 163
161 void CommitAsyncFlushes() { 164 void CommitAsyncFlushes() {
165 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
162 texture_cache.CommitAsyncFlushes(); 166 texture_cache.CommitAsyncFlushes();
163 buffer_cache.CommitAsyncFlushes(); 167 buffer_cache.CommitAsyncFlushes();
164 query_cache.CommitAsyncFlushes(); 168 query_cache.CommitAsyncFlushes();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6ab06775f..2a9bd4121 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -44,8 +44,8 @@ GPU::~GPU() = default;
44 44
45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
46 renderer = std::move(renderer_); 46 renderer = std::move(renderer_);
47 rasterizer = renderer->ReadRasterizer();
47 48
48 VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
49 memory_manager->BindRasterizer(rasterizer); 49 memory_manager->BindRasterizer(rasterizer);
50 maxwell_3d->BindRasterizer(rasterizer); 50 maxwell_3d->BindRasterizer(rasterizer);
51 fermi_2d->BindRasterizer(rasterizer); 51 fermi_2d->BindRasterizer(rasterizer);
@@ -171,7 +171,7 @@ void GPU::TickWork() {
171 const std::size_t size = request.size; 171 const std::size_t size = request.size;
172 flush_requests.pop_front(); 172 flush_requests.pop_front();
173 flush_request_mutex.unlock(); 173 flush_request_mutex.unlock();
174 renderer->Rasterizer().FlushRegion(addr, size); 174 rasterizer->FlushRegion(addr, size);
175 current_flush_fence.store(fence); 175 current_flush_fence.store(fence);
176 flush_request_mutex.lock(); 176 flush_request_mutex.lock();
177 } 177 }
@@ -193,11 +193,11 @@ u64 GPU::GetTicks() const {
193} 193}
194 194
195void GPU::FlushCommands() { 195void GPU::FlushCommands() {
196 renderer->Rasterizer().FlushCommands(); 196 rasterizer->FlushCommands();
197} 197}
198 198
199void GPU::SyncGuestHost() { 199void GPU::SyncGuestHost() {
200 renderer->Rasterizer().SyncGuestHost(); 200 rasterizer->SyncGuestHost();
201} 201}
202 202
203enum class GpuSemaphoreOperation { 203enum class GpuSemaphoreOperation {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b4ce6b154..b2ee45496 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -366,6 +366,7 @@ protected:
366 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 366 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
367 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; 367 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
368 std::unique_ptr<VideoCore::RendererBase> renderer; 368 std::unique_ptr<VideoCore::RendererBase> renderer;
369 VideoCore::RasterizerInterface* rasterizer = nullptr;
369 const bool use_nvdec; 370 const bool use_nvdec;
370 371
371private: 372private:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 7e490bcc3..50319f1d5 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
38 } 38 }
39 39
40 auto current_context = context.Acquire(); 40 auto current_context = context.Acquire();
41 VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
41 42
42 CommandDataContainer next; 43 CommandDataContainer next;
43 while (state.is_running) { 44 while (state.is_running) {
@@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
52 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { 53 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
53 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 54 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
54 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 55 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
55 renderer.Rasterizer().ReleaseFences(); 56 rasterizer->ReleaseFences();
56 } else if (std::holds_alternative<GPUTickCommand>(next.data)) { 57 } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
57 system.GPU().TickWork(); 58 system.GPU().TickWork();
58 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { 59 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
59 renderer.Rasterizer().FlushRegion(flush->addr, flush->size); 60 rasterizer->FlushRegion(flush->addr, flush->size);
60 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { 61 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
61 renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); 62 rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
62 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { 63 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
63 return; 64 return;
64 } else { 65 } else {
@@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() {
84void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 85void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
85 Core::Frontend::GraphicsContext& context, 86 Core::Frontend::GraphicsContext& context,
86 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { 87 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
88 rasterizer = renderer.ReadRasterizer();
87 thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), 89 thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
88 std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); 90 std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
89} 91}
@@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
129} 131}
130 132
131void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 133void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
132 system.Renderer().Rasterizer().OnCPUWrite(addr, size); 134 rasterizer->OnCPUWrite(addr, size);
133} 135}
134 136
135void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 137void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
136 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 138 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
137 system.Renderer().Rasterizer().OnCPUWrite(addr, size); 139 rasterizer->OnCPUWrite(addr, size);
138} 140}
139 141
140void ThreadManager::WaitIdle() const { 142void ThreadManager::WaitIdle() const {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2775629e7..4cd951169 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -27,6 +27,7 @@ class System;
27} // namespace Core 27} // namespace Core
28 28
29namespace VideoCore { 29namespace VideoCore {
30class RasterizerInterface;
30class RendererBase; 31class RendererBase;
31} // namespace VideoCore 32} // namespace VideoCore
32 33
@@ -151,11 +152,12 @@ private:
151 /// Pushes a command to be executed by the GPU thread 152 /// Pushes a command to be executed by the GPU thread
152 u64 PushCommand(CommandData&& command_data); 153 u64 PushCommand(CommandData&& command_data);
153 154
154 SynchState state;
155 Core::System& system; 155 Core::System& system;
156 std::thread thread;
157 std::thread::id thread_id;
158 const bool is_async; 156 const bool is_async;
157 VideoCore::RasterizerInterface* rasterizer = nullptr;
158
159 SynchState state;
160 std::thread thread;
159}; 161};
160 162
161} // namespace VideoCommon::GPUThread 163} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 28f2b8614..970120acc 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -12,7 +12,6 @@ set(SHADER_FILES
12 vulkan_blit_depth_stencil.frag 12 vulkan_blit_depth_stencil.frag
13 vulkan_present.frag 13 vulkan_present.frag
14 vulkan_present.vert 14 vulkan_present.vert
15 vulkan_quad_array.comp
16 vulkan_quad_indexed.comp 15 vulkan_quad_indexed.comp
17 vulkan_uint8.comp 16 vulkan_uint8.comp
18) 17)
diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp
deleted file mode 100644
index 212f4e998..000000000
--- a/src/video_core/host_shaders/vulkan_quad_array.comp
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 460 core
6
7layout (local_size_x = 1024) in;
8
9layout (std430, set = 0, binding = 0) buffer OutputBuffer {
10 uint output_indexes[];
11};
12
13layout (push_constant) uniform PushConstants {
14 uint first;
15};
16
17void main() {
18 uint primitive = gl_GlobalInvocationID.x;
19 if (primitive * 6 >= output_indexes.length()) {
20 return;
21 }
22
23 const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
24 for (uint vertex = 0; vertex < 6; ++vertex) {
25 uint index = first + primitive * 4 + quad_map[vertex];
26 output_indexes[primitive * 6 + vertex] = index;
27 }
28}
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index ad74d7af9..872291670 100644
--- a/src/video_core/host_shaders/vulkan_uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
16 uint16_t output_indexes[]; 16 uint16_t output_indexes[];
17}; 17};
18 18
19uint AssembleIndex(uint id) {
20 // Most primitive restart indices are 0xFF
21 // Hardcode this to 0xFF for now
22 uint index = uint(input_indexes[id]);
23 return index == 0xFF ? 0xFFFF : index;
24}
25
19void main() { 26void main() {
20 uint id = gl_GlobalInvocationID.x; 27 uint id = gl_GlobalInvocationID.x;
21 if (id < input_indexes.length()) { 28 if (id < input_indexes.length()) {
22 output_indexes[id] = uint16_t(input_indexes[id]); 29 output_indexes[id] = uint16_t(AssembleIndex(id));
23 } 30 }
24} 31}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index c841f3cd7..44240a9c4 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)
21 21
22MemoryManager::~MemoryManager() = default; 22MemoryManager::~MemoryManager() = default;
23 23
24void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 24void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
25 rasterizer = &rasterizer_; 25 rasterizer = rasterizer_;
26} 26}
27 27
28GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { 28GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b468a67de..b3538d503 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -72,7 +72,7 @@ public:
72 ~MemoryManager(); 72 ~MemoryManager();
73 73
74 /// Binds a renderer to the memory manager. 74 /// Binds a renderer to the memory manager.
75 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 75 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
76 76
77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
78 78
@@ -157,6 +157,8 @@ private:
157 157
158 using MapRange = std::pair<GPUVAddr, size_t>; 158 using MapRange = std::pair<GPUVAddr, size_t>;
159 std::vector<MapRange> map_ranges; 159 std::vector<MapRange> map_ranges;
160
161 std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
160}; 162};
161 163
162} // namespace Tegra 164} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0cb0f387d..50491b758 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -7,6 +7,7 @@
7#include <atomic> 7#include <atomic>
8#include <functional> 8#include <functional>
9#include <optional> 9#include <optional>
10#include <span>
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "video_core/engines/fermi_2d.h" 12#include "video_core/engines/fermi_2d.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
@@ -49,6 +50,10 @@ public:
49 /// Records a GPU query and caches it 50 /// Records a GPU query and caches it
50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; 51 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
51 52
53 /// Signal an uniform buffer binding
54 virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
55 u32 size) = 0;
56
52 /// Signal a GPU based semaphore as a fence 57 /// Signal a GPU based semaphore as a fence
53 virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; 58 virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
54 59
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 51dde8eb5..320ee8d30 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -37,15 +37,11 @@ public:
37 std::unique_ptr<Core::Frontend::GraphicsContext> context); 37 std::unique_ptr<Core::Frontend::GraphicsContext> context);
38 virtual ~RendererBase(); 38 virtual ~RendererBase();
39 39
40 /// Initialize the renderer
41 [[nodiscard]] virtual bool Init() = 0;
42
43 /// Shutdown the renderer
44 virtual void ShutDown() = 0;
45
46 /// Finalize rendering the guest frame and draw into the presentation texture 40 /// Finalize rendering the guest frame and draw into the presentation texture
47 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 41 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
48 42
43 [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
44
49 // Getter/setter functions: 45 // Getter/setter functions:
50 // ------------------------ 46 // ------------------------
51 47
@@ -57,14 +53,6 @@ public:
57 return m_current_frame; 53 return m_current_frame;
58 } 54 }
59 55
60 [[nodiscard]] RasterizerInterface& Rasterizer() {
61 return *rasterizer;
62 }
63
64 [[nodiscard]] const RasterizerInterface& Rasterizer() const {
65 return *rasterizer;
66 }
67
68 [[nodiscard]] Core::Frontend::GraphicsContext& Context() { 56 [[nodiscard]] Core::Frontend::GraphicsContext& Context() {
69 return *context; 57 return *context;
70 } 58 }
@@ -98,7 +86,6 @@ public:
98 86
99protected: 87protected:
100 Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. 88 Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
101 std::unique_ptr<RasterizerInterface> rasterizer;
102 std::unique_ptr<Core::Frontend::GraphicsContext> context; 89 std::unique_ptr<Core::Frontend::GraphicsContext> context;
103 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer 90 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
104 int m_current_frame = 0; ///< Current frame, should be set by the renderer 91 int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5772cad87..6da3906a4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <span>
6 6
7#include <glad/glad.h>
8
9#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/buffer_cache/buffer_cache.h" 7#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_opengl/gl_buffer_cache.h" 8#include "video_core/renderer_opengl/gl_buffer_cache.h"
15#include "video_core/renderer_opengl/gl_device.h" 9#include "video_core/renderer_opengl/gl_device.h"
16#include "video_core/renderer_opengl/gl_rasterizer.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h"
18 10
19namespace OpenGL { 11namespace OpenGL {
12namespace {
13struct BindlessSSBO {
14 GLuint64EXT address;
15 GLsizei length;
16 GLsizei padding;
17};
18static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
19
20constexpr std::array PROGRAM_LUT{
21 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
22 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
23};
24} // Anonymous namespace
25
26Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
27 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
28
29Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
30 VAddr cpu_addr_, u64 size_bytes_)
31 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
32 buffer.Create();
33 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
34 glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
35 glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
36
37 if (runtime.has_unified_vertex_buffers) {
38 glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
39 }
40}
20 41
21using Maxwell = Tegra::Engines::Maxwell3D::Regs; 42void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
43 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
44 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
45}
22 46
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 47void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
48 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
49 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
50}
24 51
25Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) 52void Buffer::MakeResident(GLenum access) noexcept {
26 : BufferBlock{cpu_addr_, size_} { 53 // Abuse GLenum's order to exit early
27 gl_buffer.Create(); 54 // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); 55 if (access <= current_residency_access || buffer.handle == 0) {
29 if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { 56 return;
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); 57 }
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); 58 if (std::exchange(current_residency_access, access) != GL_NONE) {
59 // If the buffer is already resident, remove its residency before promoting it
60 glMakeNamedBufferNonResidentNV(buffer.handle);
32 } 61 }
62 glMakeNamedBufferResidentNV(buffer.handle, access);
33} 63}
34 64
35Buffer::~Buffer() = default; 65BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
36 66 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
37void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { 67 use_assembly_shaders{device.UseAssemblyShaders()},
38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), 68 has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
39 static_cast<GLsizeiptr>(data_size), data); 69 stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
70 GLint gl_max_attributes;
71 glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
72 max_attributes = static_cast<u32>(gl_max_attributes);
73 for (auto& stage_uniforms : fast_uniforms) {
74 for (OGLBuffer& buffer : stage_uniforms) {
75 buffer.Create();
76 glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
77 }
78 }
79 for (auto& stage_uniforms : copy_uniforms) {
80 for (OGLBuffer& buffer : stage_uniforms) {
81 buffer.Create();
82 glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
83 }
84 }
85 for (OGLBuffer& buffer : copy_compute_uniforms) {
86 buffer.Create();
87 glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
88 }
40} 89}
41 90
42void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 91void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download); 92 std::span<const VideoCommon::BufferCopy> copies) {
44 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); 93 for (const VideoCommon::BufferCopy& copy : copies) {
45 const GLintptr gl_offset = static_cast<GLintptr>(offset); 94 glCopyNamedBufferSubData(
46 if (read_buffer.handle == 0) { 95 src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
47 read_buffer.Create(); 96 static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
48 glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
49 GL_STREAM_READ);
50 } 97 }
51 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
52 glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
53 glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
54} 98}
55 99
56void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 100void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
57 std::size_t copy_size) { 101 if (has_unified_vertex_buffers) {
58 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), 102 buffer.MakeResident(GL_READ_ONLY);
59 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); 103 glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
104 static_cast<GLsizeiptr>(size));
105 } else {
106 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
107 index_buffer_offset = offset;
108 }
60} 109}
61 110
62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, 111void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 112 u32 stride) {
64 const Device& device_, OGLStreamBuffer& stream_buffer_, 113 if (index >= max_attributes) {
65 StateTracker& state_tracker)
66 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
67 if (!device.HasFastBufferSubData()) {
68 return; 114 return;
69 } 115 }
70 116 if (has_unified_vertex_buffers) {
71 static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); 117 buffer.MakeResident(GL_READ_ONLY);
72 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 118 glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
73 for (const GLuint cbuf : cbufs) { 119 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
74 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); 120 buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
121 } else {
122 glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
123 static_cast<GLsizei>(stride));
75 } 124 }
76} 125}
77 126
78OGLBufferCache::~OGLBufferCache() { 127void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
79 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 128 u32 offset, u32 size) {
129 if (use_assembly_shaders) {
130 GLuint handle;
131 if (offset != 0) {
132 handle = copy_uniforms[stage][binding_index].handle;
133 glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
134 } else {
135 handle = buffer.Handle();
136 }
137 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
138 static_cast<GLsizeiptr>(size));
139 } else {
140 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
141 const GLuint binding = base_binding + binding_index;
142 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
143 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
144 }
80} 145}
81 146
82std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 147void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
83 return std::make_shared<Buffer>(device, cpu_addr, size); 148 u32 size) {
149 if (use_assembly_shaders) {
150 GLuint handle;
151 if (offset != 0) {
152 handle = copy_compute_uniforms[binding_index].handle;
153 glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
154 } else {
155 handle = buffer.Handle();
156 }
157 glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
158 static_cast<GLsizeiptr>(size));
159 } else {
160 glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
161 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
162 }
84} 163}
85 164
86OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { 165void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
87 return {0, 0, 0}; 166 u32 offset, u32 size, bool is_written) {
167 if (use_assembly_shaders) {
168 const BindlessSSBO ssbo{
169 .address = buffer.HostGpuAddr() + offset,
170 .length = static_cast<GLsizei>(size),
171 .padding = 0,
172 };
173 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
174 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
175 reinterpret_cast<const GLuint*>(&ssbo));
176 } else {
177 const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
178 const GLuint binding = base_binding + binding_index;
179 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
180 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
181 }
88} 182}
89 183
90OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, 184void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
91 std::size_t size) { 185 u32 size, bool is_written) {
92 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); 186 if (use_assembly_shaders) {
93 const GLuint cbuf = cbufs[cbuf_cursor++]; 187 const BindlessSSBO ssbo{
188 .address = buffer.HostGpuAddr() + offset,
189 .length = static_cast<GLsizei>(size),
190 .padding = 0,
191 };
192 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
193 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
194 reinterpret_cast<const GLuint*>(&ssbo));
195 } else if (size == 0) {
196 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
197 } else {
198 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
199 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
200 }
201}
94 202
95 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); 203void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
96 return {cbuf, 0, 0}; 204 u32 size) {
205 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
206 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
97} 207}
98 208
99} // namespace OpenGL 209} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 17ee90316..d8b20a9af 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,79 +5,157 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <span>
9 9
10#include "common/alignment.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/dynamic_library.h"
11#include "video_core/buffer_cache/buffer_cache.h" 13#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h" 14#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 17#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 18
16namespace Core {
17class System;
18}
19
20namespace OpenGL { 19namespace OpenGL {
21 20
22class Device; 21class BufferCacheRuntime;
23class OGLStreamBuffer;
24class RasterizerOpenGL;
25class StateTracker;
26 22
27class Buffer : public VideoCommon::BufferBlock { 23class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
28public: 24public:
29 explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); 25 explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
30 ~Buffer(); 26 u64 size_bytes);
27 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
31 28
32 void Upload(std::size_t offset, std::size_t data_size, const u8* data); 29 void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
33 30
34 void Download(std::size_t offset, std::size_t data_size, u8* data); 31 void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
35 32
36 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 33 void MakeResident(GLenum access) noexcept;
37 std::size_t copy_size);
38 34
39 GLuint Handle() const noexcept { 35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
40 return gl_buffer.handle; 36 return address;
41 } 37 }
42 38
43 u64 Address() const noexcept { 39 [[nodiscard]] GLuint Handle() const noexcept {
44 return gpu_address; 40 return buffer.handle;
45 } 41 }
46 42
47private: 43private:
48 OGLBuffer gl_buffer; 44 GLuint64EXT address = 0;
49 OGLBuffer read_buffer; 45 OGLBuffer buffer;
50 u64 gpu_address = 0; 46 GLenum current_residency_access = GL_NONE;
51}; 47};
52 48
53using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; 49class BufferCacheRuntime {
54class OGLBufferCache final : public GenericBufferCache { 50 friend Buffer;
51
55public: 52public:
56 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, 53 static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
57 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 54
58 const Device& device, OGLStreamBuffer& stream_buffer, 55 explicit BufferCacheRuntime(const Device& device_);
59 StateTracker& state_tracker); 56
60 ~OGLBufferCache(); 57 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
58 std::span<const VideoCommon::BufferCopy> copies);
59
60 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
61
62 void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
63
64 void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
65
66 void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
67
68 void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
69 bool is_written);
70
71 void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
72 bool is_written);
73
74 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
75
76 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
77 if (use_assembly_shaders) {
78 const GLuint handle = fast_uniforms[stage][binding_index].handle;
79 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
80 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
81 } else {
82 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
83 const GLuint binding = base_binding + binding_index;
84 glBindBufferRange(GL_UNIFORM_BUFFER, binding,
85 fast_uniforms[stage][binding_index].handle, 0,
86 static_cast<GLsizeiptr>(size));
87 }
88 }
61 89
62 BufferInfo GetEmptyBuffer(std::size_t) override; 90 void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
91 if (use_assembly_shaders) {
92 glProgramBufferParametersIuivNV(
93 PABO_LUT[stage], binding_index, 0,
94 static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
95 reinterpret_cast<const GLuint*>(data.data()));
96 } else {
97 glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
98 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
99 }
100 }
63 101
64 void Acquire() noexcept { 102 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
65 cbuf_cursor = 0; 103 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
104 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
105 const GLuint binding = base_binding + binding_index;
106 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
107 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
108 return mapped_span;
66 } 109 }
67 110
68protected: 111 [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
69 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 112 return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
113 }
70 114
71 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; 115 [[nodiscard]] bool HasFastBufferSubData() const noexcept {
116 return has_fast_buffer_sub_data;
117 }
72 118
73private: 119private:
74 static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * 120 static constexpr std::array PABO_LUT{
75 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; 121 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
122 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
123 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
124 };
76 125
77 const Device& device; 126 const Device& device;
78 127
79 std::size_t cbuf_cursor = 0; 128 bool has_fast_buffer_sub_data = false;
80 std::array<GLuint, NUM_CBUFS> cbufs{}; 129 bool use_assembly_shaders = false;
130 bool has_unified_vertex_buffers = false;
131
132 u32 max_attributes = 0;
133
134 std::optional<StreamBuffer> stream_buffer;
135
136 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
137 VideoCommon::NUM_STAGES>
138 fast_uniforms;
139 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
140 VideoCommon::NUM_STAGES>
141 copy_uniforms;
142 std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
143
144 u32 index_buffer_offset = 0;
145};
146
147struct BufferCacheParams {
148 using Runtime = OpenGL::BufferCacheRuntime;
149 using Buffer = OpenGL::Buffer;
150
151 static constexpr bool IS_OPENGL = true;
152 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
153 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
154 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
155 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
156 static constexpr bool USE_MEMORY_MAPS = false;
81}; 157};
82 158
159using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
160
83} // namespace OpenGL 161} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 04c267ee4..48d5c4a5e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,7 @@
21#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
22 22
23namespace OpenGL { 23namespace OpenGL {
24
25namespace { 24namespace {
26
27// One uniform block is reserved for emulation purposes 25// One uniform block is reserved for emulation purposes
28constexpr u32 ReservedUniformBlocks = 1; 26constexpr u32 ReservedUniformBlocks = 1;
29 27
@@ -197,11 +195,13 @@ bool IsASTCSupported() {
197 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 195 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
198 return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); 196 return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
199} 197}
200
201} // Anonymous namespace 198} // Anonymous namespace
202 199
203Device::Device() 200Device::Device() {
204 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { 201 if (!GLAD_GL_VERSION_4_6) {
202 LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
203 throw std::runtime_error{"Insufficient version"};
204 }
205 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 205 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
206 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); 206 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
207 const std::vector extensions = GetExtensions(); 207 const std::vector extensions = GetExtensions();
@@ -217,6 +217,9 @@ Device::Device()
217 "Beta driver 443.24 is known to have issues. There might be performance issues."); 217 "Beta driver 443.24 is known to have issues. There might be performance issues.");
218 disable_fast_buffer_sub_data = true; 218 disable_fast_buffer_sub_data = true;
219 } 219 }
220
221 max_uniform_buffers = BuildMaxUniformBuffers();
222 base_bindings = BuildBaseBindings();
220 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 223 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
221 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 224 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
222 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 225 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9141de635..ee053776d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@
10 10
11namespace OpenGL { 11namespace OpenGL {
12 12
13static constexpr u32 EmulationUniformBlockBinding = 0; 13class Device {
14
15class Device final {
16public: 14public:
17 struct BaseBindings final { 15 struct BaseBindings {
18 u32 uniform_buffer{}; 16 u32 uniform_buffer{};
19 u32 shader_storage_buffer{}; 17 u32 shader_storage_buffer{};
20 u32 sampler{}; 18 u32 sampler{};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 3e9c922f5..151290101 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
47 47
48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, 48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
49 Tegra::GPU& gpu_, TextureCache& texture_cache_, 49 Tegra::GPU& gpu_, TextureCache& texture_cache_,
50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_) 50 BufferCache& buffer_cache_, QueryCache& query_cache_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
52 52
53Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { 53Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 30dbee613..e714aa115 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -32,14 +32,13 @@ private:
32}; 32};
33 33
34using Fence = std::shared_ptr<GLInnerFence>; 34using Fence = std::shared_ptr<GLInnerFence>;
35using GenericFenceManager = 35using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
36 VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
37 36
38class FenceManagerOpenGL final : public GenericFenceManager { 37class FenceManagerOpenGL final : public GenericFenceManager {
39public: 38public:
40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 39 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
41 TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, 40 TextureCache& texture_cache, BufferCache& buffer_cache,
42 QueryCache& query_cache_); 41 QueryCache& query_cache);
43 42
44protected: 43protected:
45 Fence CreateFence(u32 value, bool is_stubbed) override; 44 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ea4ca9a82..418644108 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -38,34 +38,21 @@
38namespace OpenGL { 38namespace OpenGL {
39 39
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 40using Maxwell = Tegra::Engines::Maxwell3D::Regs;
41using GLvec4 = std::array<GLfloat, 4>;
41 42
42using Tegra::Engines::ShaderType; 43using Tegra::Engines::ShaderType;
43using VideoCore::Surface::PixelFormat; 44using VideoCore::Surface::PixelFormat;
44using VideoCore::Surface::SurfaceTarget; 45using VideoCore::Surface::SurfaceTarget;
45using VideoCore::Surface::SurfaceType; 46using VideoCore::Surface::SurfaceType;
46 47
47MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
48MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
49MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
50MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
51MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
52MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
53MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
54MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); 48MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
49MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
55MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); 50MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
56MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); 51MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
57MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
58 52
59namespace { 53namespace {
60 54
61constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
62constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
63 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
64constexpr size_t TOTAL_CONST_BUFFER_BYTES =
65 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
66
67constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
68constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
69 56
70struct TextureHandle { 57struct TextureHandle {
71 constexpr TextureHandle(u32 data, bool via_header_index) { 58 constexpr TextureHandle(u32 data, bool via_header_index) {
@@ -101,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
101 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); 88 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
102} 89}
103 90
104std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
105 const ConstBufferEntry& entry) {
106 if (!entry.IsIndirect()) {
107 return entry.GetSize();
108 }
109 if (buffer.size > Maxwell::MaxConstBufferSize) {
110 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
111 Maxwell::MaxConstBufferSize);
112 return Maxwell::MaxConstBufferSize;
113 }
114
115 return buffer.size;
116}
117
118/// Translates hardware transform feedback indices 91/// Translates hardware transform feedback indices
119/// @param location Hardware location 92/// @param location Hardware location
120/// @return Pair of ARB_transform_feedback3 token stream first and third arguments 93/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -147,14 +120,6 @@ void oglEnable(GLenum cap, bool state) {
147 (state ? glEnable : glDisable)(cap); 120 (state ? glEnable : glDisable)(cap);
148} 121}
149 122
150void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
151 if (num_ssbos == 0) {
152 return;
153 }
154 glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
155 reinterpret_cast<const GLuint*>(ssbos));
156}
157
158ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { 123ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
159 if (entry.is_buffer) { 124 if (entry.is_buffer) {
160 return ImageViewType::Buffer; 125 return ImageViewType::Buffer;
@@ -201,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
201 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), 166 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
202 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), 167 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
203 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), 168 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
204 stream_buffer(device, state_tracker),
205 texture_cache_runtime(device, program_manager, state_tracker), 169 texture_cache_runtime(device, program_manager, state_tracker),
206 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 170 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
171 buffer_cache_runtime(device),
172 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
207 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 173 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
208 query_cache(*this, maxwell3d, gpu_memory), 174 query_cache(*this, maxwell3d, gpu_memory),
209 buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
210 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 175 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
211 async_shaders(emu_window_) { 176 async_shaders(emu_window_) {
212 unified_uniform_buffer.Create();
213 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
214
215 if (device.UseAssemblyShaders()) {
216 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
217 for (const GLuint cbuf : staging_cbufs) {
218 glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
219 nullptr, 0);
220 }
221 }
222 if (device.UseAsynchronousShaders()) { 177 if (device.UseAsynchronousShaders()) {
223 async_shaders.AllocateWorkers(); 178 async_shaders.AllocateWorkers();
224 } 179 }
225} 180}
226 181
227RasterizerOpenGL::~RasterizerOpenGL() { 182RasterizerOpenGL::~RasterizerOpenGL() = default;
228 if (device.UseAssemblyShaders()) {
229 glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
230 }
231}
232 183
233void RasterizerOpenGL::SetupVertexFormat() { 184void RasterizerOpenGL::SyncVertexFormats() {
234 auto& flags = maxwell3d.dirty.flags; 185 auto& flags = maxwell3d.dirty.flags;
235 if (!flags[Dirty::VertexFormats]) { 186 if (!flags[Dirty::VertexFormats]) {
236 return; 187 return;
237 } 188 }
238 flags[Dirty::VertexFormats] = false; 189 flags[Dirty::VertexFormats] = false;
239 190
240 MICROPROFILE_SCOPE(OpenGL_VAO);
241
242 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables 191 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
243 // the first 16 vertex attributes always, as we don't know which ones are actually used until 192 // the first 16 vertex attributes always, as we don't know which ones are actually used until
244 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to 193 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -274,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
274 } 223 }
275} 224}
276 225
277void RasterizerOpenGL::SetupVertexBuffer() { 226void RasterizerOpenGL::SyncVertexInstances() {
278 auto& flags = maxwell3d.dirty.flags;
279 if (!flags[Dirty::VertexBuffers]) {
280 return;
281 }
282 flags[Dirty::VertexBuffers] = false;
283
284 MICROPROFILE_SCOPE(OpenGL_VB);
285
286 const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
287
288 // Upload all guest vertex arrays sequentially to our buffer
289 const auto& regs = maxwell3d.regs;
290 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
291 if (!flags[Dirty::VertexBuffer0 + index]) {
292 continue;
293 }
294 flags[Dirty::VertexBuffer0 + index] = false;
295
296 const auto& vertex_array = regs.vertex_array[index];
297 if (!vertex_array.IsEnabled()) {
298 continue;
299 }
300
301 const GPUVAddr start = vertex_array.StartAddress();
302 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
303 ASSERT(end >= start);
304
305 const GLuint gl_index = static_cast<GLuint>(index);
306 const u64 size = end - start;
307 if (size == 0) {
308 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
309 if (use_unified_memory) {
310 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
311 }
312 continue;
313 }
314 const auto info = buffer_cache.UploadMemory(start, size);
315 if (use_unified_memory) {
316 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
317 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
318 info.address + info.offset, size);
319 } else {
320 glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
321 }
322 }
323}
324
325void RasterizerOpenGL::SetupVertexInstances() {
326 auto& flags = maxwell3d.dirty.flags; 227 auto& flags = maxwell3d.dirty.flags;
327 if (!flags[Dirty::VertexInstances]) { 228 if (!flags[Dirty::VertexInstances]) {
328 return; 229 return;
@@ -343,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
343 } 244 }
344} 245}
345 246
346GLintptr RasterizerOpenGL::SetupIndexBuffer() { 247void RasterizerOpenGL::SetupShaders(bool is_indexed) {
347 MICROPROFILE_SCOPE(OpenGL_Index);
348 const auto& regs = maxwell3d.regs;
349 const std::size_t size = CalculateIndexBufferSize();
350 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
351 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
352 return info.offset;
353}
354
355void RasterizerOpenGL::SetupShaders() {
356 MICROPROFILE_SCOPE(OpenGL_Shader);
357 u32 clip_distances = 0; 248 u32 clip_distances = 0;
358 249
359 std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; 250 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -410,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() {
410 const size_t stage = index == 0 ? 0 : index - 1; 301 const size_t stage = index == 0 ? 0 : index - 1;
411 shaders[stage] = shader; 302 shaders[stage] = shader;
412 303
413 SetupDrawConstBuffers(stage, shader);
414 SetupDrawGlobalMemory(stage, shader);
415 SetupDrawTextures(shader, stage); 304 SetupDrawTextures(shader, stage);
416 SetupDrawImages(shader, stage); 305 SetupDrawImages(shader, stage);
417 306
307 buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
308
309 buffer_cache.UnbindGraphicsStorageBuffers(stage);
310 u32 ssbo_index = 0;
311 for (const auto& buffer : shader->GetEntries().global_memory_entries) {
312 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
313 buffer.cbuf_offset, buffer.is_written);
314 ++ssbo_index;
315 }
316
418 // Workaround for Intel drivers. 317 // Workaround for Intel drivers.
419 // When a clip distance is enabled but not set in the shader it crops parts of the screen 318 // When a clip distance is enabled but not set in the shader it crops parts of the screen
420 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 319 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -430,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() {
430 SyncClipEnabled(clip_distances); 329 SyncClipEnabled(clip_distances);
431 maxwell3d.dirty.flags[Dirty::Shaders] = false; 330 maxwell3d.dirty.flags[Dirty::Shaders] = false;
432 331
332 buffer_cache.UpdateGraphicsBuffers(is_indexed);
333
433 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 334 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
434 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); 335 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
435 336
337 buffer_cache.BindHostGeometryBuffers(is_indexed);
338
436 size_t image_view_index = 0; 339 size_t image_view_index = 0;
437 size_t texture_index = 0; 340 size_t texture_index = 0;
438 size_t image_index = 0; 341 size_t image_index = 0;
439 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 342 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
440 const Shader* const shader = shaders[stage]; 343 const Shader* const shader = shaders[stage];
441 if (shader) { 344 if (!shader) {
442 const auto base = device.GetBaseBindings(stage);
443 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
444 texture_index, image_index);
445 }
446 }
447}
448
449std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
450 const auto& regs = maxwell3d.regs;
451
452 std::size_t size = 0;
453 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
454 if (!regs.vertex_array[index].IsEnabled())
455 continue; 345 continue;
456 346 }
457 const GPUVAddr start = regs.vertex_array[index].StartAddress(); 347 buffer_cache.BindHostStageBuffers(stage);
458 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 348 const auto& base = device.GetBaseBindings(stage);
459 349 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
460 size += end - start; 350 texture_index, image_index);
461 ASSERT(end >= start);
462 } 351 }
463
464 return size;
465}
466
467std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
468 return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
469 static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
470} 352}
471 353
472void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, 354void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -475,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
475} 357}
476 358
477void RasterizerOpenGL::Clear() { 359void RasterizerOpenGL::Clear() {
360 MICROPROFILE_SCOPE(OpenGL_Clears);
478 if (!maxwell3d.ShouldExecute()) { 361 if (!maxwell3d.ShouldExecute()) {
479 return; 362 return;
480 } 363 }
@@ -525,11 +408,9 @@ void RasterizerOpenGL::Clear() {
525 } 408 }
526 UNIMPLEMENTED_IF(regs.clear_flags.viewport); 409 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
527 410
528 { 411 std::scoped_lock lock{texture_cache.mutex};
529 auto lock = texture_cache.AcquireLock(); 412 texture_cache.UpdateRenderTargets(true);
530 texture_cache.UpdateRenderTargets(true); 413 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
531 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
532 }
533 414
534 if (use_color) { 415 if (use_color) {
535 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 416 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -541,7 +422,6 @@ void RasterizerOpenGL::Clear() {
541 } else if (use_stencil) { 422 } else if (use_stencil) {
542 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 423 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
543 } 424 }
544
545 ++num_queued_commands; 425 ++num_queued_commands;
546} 426}
547 427
@@ -550,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
550 430
551 query_cache.UpdateCounters(); 431 query_cache.UpdateCounters();
552 432
553 SyncViewport(); 433 SyncState();
554 SyncRasterizeEnable();
555 SyncPolygonModes();
556 SyncColorMask();
557 SyncFragmentColorClampState();
558 SyncMultiSampleState();
559 SyncDepthTestState();
560 SyncDepthClamp();
561 SyncStencilTestState();
562 SyncBlendState();
563 SyncLogicOpState();
564 SyncCullMode();
565 SyncPrimitiveRestart();
566 SyncScissorTest();
567 SyncPointState();
568 SyncLineState();
569 SyncPolygonOffset();
570 SyncAlphaTest();
571 SyncFramebufferSRGB();
572
573 buffer_cache.Acquire();
574 current_cbuf = 0;
575
576 std::size_t buffer_size = CalculateVertexArraysSize();
577
578 // Add space for index buffer
579 if (is_indexed) {
580 buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
581 }
582
583 // Uniform space for the 5 shader stages
584 buffer_size =
585 Common::AlignUp<std::size_t>(buffer_size, 4) +
586 (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
587
588 // Add space for at least 18 constant buffers
589 buffer_size += Maxwell::MaxConstBuffers *
590 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
591
592 // Prepare the vertex array.
593 buffer_cache.Map(buffer_size);
594
595 // Prepare vertex array format.
596 SetupVertexFormat();
597
598 // Upload vertex and index data.
599 SetupVertexBuffer();
600 SetupVertexInstances();
601 GLintptr index_buffer_offset = 0;
602 if (is_indexed) {
603 index_buffer_offset = SetupIndexBuffer();
604 }
605
606 // Setup emulation uniform buffer.
607 if (!device.UseAssemblyShaders()) {
608 MaxwellUniformData ubo;
609 ubo.SetFromRegs(maxwell3d);
610 const auto info =
611 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
612 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
613 static_cast<GLsizeiptr>(sizeof(ubo)));
614 }
615 434
616 // Setup shaders and their used resources. 435 // Setup shaders and their used resources.
617 auto lock = texture_cache.AcquireLock(); 436 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
618 SetupShaders(); 437 SetupShaders(is_indexed);
619 438
620 // Signal the buffer cache that we are not going to upload more things.
621 buffer_cache.Unmap();
622 texture_cache.UpdateRenderTargets(false); 439 texture_cache.UpdateRenderTargets(false);
623 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); 440 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
624 program_manager.BindGraphicsPipeline(); 441 program_manager.BindGraphicsPipeline();
@@ -632,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
632 if (is_indexed) { 449 if (is_indexed) {
633 const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); 450 const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
634 const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); 451 const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
635 const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); 452 const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
636 const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); 453 const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
637 if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { 454 if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
638 glDrawElements(primitive_mode, num_vertices, format, offset); 455 glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -672,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
672} 489}
673 490
674void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 491void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
675 buffer_cache.Acquire();
676 current_cbuf = 0;
677
678 Shader* const kernel = shader_cache.GetComputeKernel(code_addr); 492 Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
679 493
680 auto lock = texture_cache.AcquireLock(); 494 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
681 BindComputeTextures(kernel); 495 BindComputeTextures(kernel);
682 496
683 const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * 497 const auto& entries = kernel->GetEntries();
684 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 498 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
685 buffer_cache.Map(buffer_size); 499 buffer_cache.UnbindComputeStorageBuffers();
686 500 u32 ssbo_index = 0;
687 SetupComputeConstBuffers(kernel); 501 for (const auto& buffer : entries.global_memory_entries) {
688 SetupComputeGlobalMemory(kernel); 502 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
689 503 buffer.is_written);
690 buffer_cache.Unmap(); 504 ++ssbo_index;
505 }
506 buffer_cache.UpdateComputeBuffers();
507 buffer_cache.BindHostComputeBuffers();
691 508
692 const auto& launch_desc = kepler_compute.launch_description; 509 const auto& launch_desc = kepler_compute.launch_description;
693 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 510 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -703,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
703 query_cache.Query(gpu_addr, type, timestamp); 520 query_cache.Query(gpu_addr, type, timestamp);
704} 521}
705 522
523void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
524 u32 size) {
525 std::scoped_lock lock{buffer_cache.mutex};
526 buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
527}
528
706void RasterizerOpenGL::FlushAll() {} 529void RasterizerOpenGL::FlushAll() {}
707 530
708void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 531void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -711,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
711 return; 534 return;
712 } 535 }
713 { 536 {
714 auto lock = texture_cache.AcquireLock(); 537 std::scoped_lock lock{texture_cache.mutex};
715 texture_cache.DownloadMemory(addr, size); 538 texture_cache.DownloadMemory(addr, size);
716 } 539 }
717 buffer_cache.FlushRegion(addr, size); 540 {
541 std::scoped_lock lock{buffer_cache.mutex};
542 buffer_cache.DownloadMemory(addr, size);
543 }
718 query_cache.FlushRegion(addr, size); 544 query_cache.FlushRegion(addr, size);
719} 545}
720 546
721bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { 547bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
548 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
722 if (!Settings::IsGPULevelHigh()) { 549 if (!Settings::IsGPULevelHigh()) {
723 return buffer_cache.MustFlushRegion(addr, size); 550 return buffer_cache.IsRegionGpuModified(addr, size);
724 } 551 }
725 return texture_cache.IsRegionGpuModified(addr, size) || 552 return texture_cache.IsRegionGpuModified(addr, size) ||
726 buffer_cache.MustFlushRegion(addr, size); 553 buffer_cache.IsRegionGpuModified(addr, size);
727} 554}
728 555
729void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 556void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -732,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
732 return; 559 return;
733 } 560 }
734 { 561 {
735 auto lock = texture_cache.AcquireLock(); 562 std::scoped_lock lock{texture_cache.mutex};
736 texture_cache.WriteMemory(addr, size); 563 texture_cache.WriteMemory(addr, size);
737 } 564 }
565 {
566 std::scoped_lock lock{buffer_cache.mutex};
567 buffer_cache.WriteMemory(addr, size);
568 }
738 shader_cache.InvalidateRegion(addr, size); 569 shader_cache.InvalidateRegion(addr, size);
739 buffer_cache.InvalidateRegion(addr, size);
740 query_cache.InvalidateRegion(addr, size); 570 query_cache.InvalidateRegion(addr, size);
741} 571}
742 572
@@ -745,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
745 if (addr == 0 || size == 0) { 575 if (addr == 0 || size == 0) {
746 return; 576 return;
747 } 577 }
578 shader_cache.OnCPUWrite(addr, size);
748 { 579 {
749 auto lock = texture_cache.AcquireLock(); 580 std::scoped_lock lock{texture_cache.mutex};
750 texture_cache.WriteMemory(addr, size); 581 texture_cache.WriteMemory(addr, size);
751 } 582 }
752 shader_cache.OnCPUWrite(addr, size); 583 {
753 buffer_cache.OnCPUWrite(addr, size); 584 std::scoped_lock lock{buffer_cache.mutex};
585 buffer_cache.CachedWriteMemory(addr, size);
586 }
754} 587}
755 588
756void RasterizerOpenGL::SyncGuestHost() { 589void RasterizerOpenGL::SyncGuestHost() {
757 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 590 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
758 buffer_cache.SyncGuestHost();
759 shader_cache.SyncGuestHost(); 591 shader_cache.SyncGuestHost();
592 {
593 std::scoped_lock lock{buffer_cache.mutex};
594 buffer_cache.FlushCachedWrites();
595 }
760} 596}
761 597
762void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { 598void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
763 { 599 {
764 auto lock = texture_cache.AcquireLock(); 600 std::scoped_lock lock{texture_cache.mutex};
765 texture_cache.UnmapMemory(addr, size); 601 texture_cache.UnmapMemory(addr, size);
766 } 602 }
767 buffer_cache.OnCPUWrite(addr, size); 603 {
604 std::scoped_lock lock{buffer_cache.mutex};
605 buffer_cache.WriteMemory(addr, size);
606 }
768 shader_cache.OnCPUWrite(addr, size); 607 shader_cache.OnCPUWrite(addr, size);
769} 608}
770 609
@@ -799,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
799} 638}
800 639
801void RasterizerOpenGL::WaitForIdle() { 640void RasterizerOpenGL::WaitForIdle() {
802 // Place a barrier on everything that is not framebuffer related. 641 glMemoryBarrier(GL_ALL_BARRIER_BITS);
803 // This is related to another flag that is not currently implemented.
804 glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
805 GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
806 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
807 GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
808 GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
809 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
810} 642}
811 643
812void RasterizerOpenGL::FragmentBarrier() { 644void RasterizerOpenGL::FragmentBarrier() {
@@ -831,18 +663,21 @@ void RasterizerOpenGL::TickFrame() {
831 num_queued_commands = 0; 663 num_queued_commands = 0;
832 664
833 fence_manager.TickFrame(); 665 fence_manager.TickFrame();
834 buffer_cache.TickFrame();
835 { 666 {
836 auto lock = texture_cache.AcquireLock(); 667 std::scoped_lock lock{texture_cache.mutex};
837 texture_cache.TickFrame(); 668 texture_cache.TickFrame();
838 } 669 }
670 {
671 std::scoped_lock lock{buffer_cache.mutex};
672 buffer_cache.TickFrame();
673 }
839} 674}
840 675
841bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 676bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
842 const Tegra::Engines::Fermi2D::Surface& dst, 677 const Tegra::Engines::Fermi2D::Surface& dst,
843 const Tegra::Engines::Fermi2D::Config& copy_config) { 678 const Tegra::Engines::Fermi2D::Config& copy_config) {
844 MICROPROFILE_SCOPE(OpenGL_Blits); 679 MICROPROFILE_SCOPE(OpenGL_Blits);
845 auto lock = texture_cache.AcquireLock(); 680 std::scoped_lock lock{texture_cache.mutex};
846 texture_cache.BlitImage(dst, src, copy_config); 681 texture_cache.BlitImage(dst, src, copy_config);
847 return true; 682 return true;
848} 683}
@@ -854,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
854 } 689 }
855 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 690 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
856 691
857 auto lock = texture_cache.AcquireLock(); 692 std::scoped_lock lock{texture_cache.mutex};
858 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; 693 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
859 if (!image_view) { 694 if (!image_view) {
860 return false; 695 return false;
@@ -921,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
921 } 756 }
922} 757}
923 758
924void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
925 static constexpr std::array PARAMETER_LUT{
926 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
927 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
928 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
929 };
930 MICROPROFILE_SCOPE(OpenGL_UBO);
931 const auto& stages = maxwell3d.state.shader_stages;
932 const auto& shader_stage = stages[stage_index];
933 const auto& entries = shader->GetEntries();
934 const bool use_unified = entries.use_unified_uniforms;
935 const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
936
937 const auto base_bindings = device.GetBaseBindings(stage_index);
938 u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
939 for (const auto& entry : entries.const_buffers) {
940 const u32 index = entry.GetIndex();
941 const auto& buffer = shader_stage.const_buffers[index];
942 SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
943 base_unified_offset + index * Maxwell::MaxConstBufferSize);
944 ++binding;
945 }
946 if (use_unified) {
947 const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
948 entries.global_memory_entries.size());
949 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
950 base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
951 }
952}
953
954void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
955 MICROPROFILE_SCOPE(OpenGL_UBO);
956 const auto& launch_desc = kepler_compute.launch_description;
957 const auto& entries = kernel->GetEntries();
958 const bool use_unified = entries.use_unified_uniforms;
959
960 u32 binding = 0;
961 for (const auto& entry : entries.const_buffers) {
962 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
963 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
964 Tegra::Engines::ConstBufferInfo buffer;
965 buffer.address = config.Address();
966 buffer.size = config.size;
967 buffer.enabled = mask[entry.GetIndex()];
968 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
969 use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
970 ++binding;
971 }
972 if (use_unified) {
973 const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
974 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
975 NUM_CONST_BUFFERS_BYTES_PER_STAGE);
976 }
977}
978
979void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
980 const Tegra::Engines::ConstBufferInfo& buffer,
981 const ConstBufferEntry& entry, bool use_unified,
982 std::size_t unified_offset) {
983 if (!buffer.enabled) {
984 // Set values to zero to unbind buffers
985 if (device.UseAssemblyShaders()) {
986 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
987 } else {
988 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
989 }
990 return;
991 }
992
993 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
994 // UBO alignment requirements.
995 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
996
997 const bool fast_upload = !use_unified && device.HasFastBufferSubData();
998
999 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
1000 const GPUVAddr gpu_addr = buffer.address;
1001 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
1002
1003 if (device.UseAssemblyShaders()) {
1004 UNIMPLEMENTED_IF(use_unified);
1005 if (info.offset != 0) {
1006 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
1007 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
1008 info.handle = staging_cbuf;
1009 info.offset = 0;
1010 }
1011 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
1012 return;
1013 }
1014
1015 if (use_unified) {
1016 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
1017 unified_offset, size);
1018 } else {
1019 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
1020 }
1021}
1022
1023void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
1024 static constexpr std::array TARGET_LUT = {
1025 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1026 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1027 };
1028 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1029 const auto& entries{shader->GetEntries().global_memory_entries};
1030
1031 std::array<BindlessSSBO, 32> ssbos;
1032 ASSERT(entries.size() < ssbos.size());
1033
1034 const bool assembly_shaders = device.UseAssemblyShaders();
1035 u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
1036 for (const auto& entry : entries) {
1037 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
1038 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1039 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1040 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1041 ++binding;
1042 }
1043 if (assembly_shaders) {
1044 UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
1045 }
1046}
1047
1048void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1049 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1050 const auto& entries{kernel->GetEntries().global_memory_entries};
1051
1052 std::array<BindlessSSBO, 32> ssbos;
1053 ASSERT(entries.size() < ssbos.size());
1054
1055 u32 binding = 0;
1056 for (const auto& entry : entries) {
1057 const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
1058 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1059 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1060 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1061 ++binding;
1062 }
1063 if (device.UseAssemblyShaders()) {
1064 UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
1065 }
1066}
1067
1068void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1069 GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
1070 const size_t alignment{device.GetShaderStorageBufferAlignment()};
1071 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1072 if (device.UseAssemblyShaders()) {
1073 *ssbo = BindlessSSBO{
1074 .address = static_cast<GLuint64EXT>(info.address + info.offset),
1075 .length = static_cast<GLsizei>(size),
1076 .padding = 0,
1077 };
1078 } else {
1079 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1080 static_cast<GLsizeiptr>(size));
1081 }
1082}
1083
1084void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { 759void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
1085 const bool via_header_index = 760 const bool via_header_index =
1086 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 761 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1128,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
1128 } 803 }
1129} 804}
1130 805
806void RasterizerOpenGL::SyncState() {
807 SyncViewport();
808 SyncRasterizeEnable();
809 SyncPolygonModes();
810 SyncColorMask();
811 SyncFragmentColorClampState();
812 SyncMultiSampleState();
813 SyncDepthTestState();
814 SyncDepthClamp();
815 SyncStencilTestState();
816 SyncBlendState();
817 SyncLogicOpState();
818 SyncCullMode();
819 SyncPrimitiveRestart();
820 SyncScissorTest();
821 SyncPointState();
822 SyncLineState();
823 SyncPolygonOffset();
824 SyncAlphaTest();
825 SyncFramebufferSRGB();
826 SyncVertexFormats();
827 SyncVertexInstances();
828}
829
1131void RasterizerOpenGL::SyncViewport() { 830void RasterizerOpenGL::SyncViewport() {
1132 auto& flags = maxwell3d.dirty.flags; 831 auto& flags = maxwell3d.dirty.flags;
1133 const auto& regs = maxwell3d.regs; 832 const auto& regs = maxwell3d.regs;
@@ -1163,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() {
1163 if (regs.screen_y_control.y_negate != 0) { 862 if (regs.screen_y_control.y_negate != 0) {
1164 flip_y = !flip_y; 863 flip_y = !flip_y;
1165 } 864 }
1166 glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, 865 const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
1167 regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE 866 const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
1168 : GL_NEGATIVE_ONE_TO_ONE); 867 const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
868 state_tracker.ClipControl(origin, depth);
869 state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
1169 } 870 }
1170 871
1171 if (dirty_viewport) { 872 if (dirty_viewport) {
@@ -1649,36 +1350,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1649 if (regs.tfb_enabled == 0) { 1350 if (regs.tfb_enabled == 0) {
1650 return; 1351 return;
1651 } 1352 }
1652
1653 if (device.UseAssemblyShaders()) { 1353 if (device.UseAssemblyShaders()) {
1654 SyncTransformFeedback(); 1354 SyncTransformFeedback();
1655 } 1355 }
1656
1657 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 1356 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1658 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 1357 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1659 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 1358 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1660 1359 UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
1661 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1662 const auto& binding = regs.tfb_bindings[index];
1663 if (!binding.buffer_enable) {
1664 if (enabled_transform_feedback_buffers[index]) {
1665 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1666 0);
1667 }
1668 enabled_transform_feedback_buffers[index] = false;
1669 continue;
1670 }
1671 enabled_transform_feedback_buffers[index] = true;
1672
1673 auto& tfb_buffer = transform_feedback_buffers[index];
1674 tfb_buffer.Create();
1675
1676 const GLuint handle = tfb_buffer.handle;
1677 const std::size_t size = binding.buffer_size;
1678 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1679 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1680 static_cast<GLsizeiptr>(size));
1681 }
1682 1360
1683 // We may have to call BeginTransformFeedbackNV here since they seem to call different 1361 // We may have to call BeginTransformFeedbackNV here since they seem to call different
1684 // implementations on Nvidia's driver (the pointer is different) but we are using 1362 // implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1692,23 +1370,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
1692 if (regs.tfb_enabled == 0) { 1370 if (regs.tfb_enabled == 0) {
1693 return; 1371 return;
1694 } 1372 }
1695
1696 glEndTransformFeedback(); 1373 glEndTransformFeedback();
1697
1698 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1699 const auto& binding = regs.tfb_bindings[index];
1700 if (!binding.buffer_enable) {
1701 continue;
1702 }
1703 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1704
1705 const GLuint handle = transform_feedback_buffers[index].handle;
1706 const GPUVAddr gpu_addr = binding.Address();
1707 const std::size_t size = binding.buffer_size;
1708 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1709 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1710 static_cast<GLsizeiptr>(size));
1711 }
1712} 1374}
1713 1375
1714} // namespace OpenGL 1376} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82e03e677..3745cf637 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,6 @@
30#include "video_core/renderer_opengl/gl_shader_decompiler.h" 30#include "video_core/renderer_opengl/gl_shader_decompiler.h"
31#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
32#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
33#include "video_core/renderer_opengl/gl_stream_buffer.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 33#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/shader/async_shaders.h" 34#include "video_core/shader/async_shaders.h"
36#include "video_core/textures/texture.h" 35#include "video_core/textures/texture.h"
@@ -72,6 +71,7 @@ public:
72 void DispatchCompute(GPUVAddr code_addr) override; 71 void DispatchCompute(GPUVAddr code_addr) override;
73 void ResetCounter(VideoCore::QueryType type) override; 72 void ResetCounter(VideoCore::QueryType type) override;
74 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 73 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
74 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
75 void FlushAll() override; 75 void FlushAll() override;
76 void FlushRegion(VAddr addr, u64 size) override; 76 void FlushRegion(VAddr addr, u64 size) override;
77 bool MustFlushRegion(VAddr addr, u64 size) override; 77 bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -119,27 +119,6 @@ private:
119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, 119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
120 size_t& image_view_index, size_t& texture_index, size_t& image_index); 120 size_t& image_view_index, size_t& texture_index, size_t& image_index);
121 121
122 /// Configures the current constbuffers to use for the draw command.
123 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
124
125 /// Configures the current constbuffers to use for the kernel invocation.
126 void SetupComputeConstBuffers(Shader* kernel);
127
128 /// Configures a constant buffer.
129 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
130 const ConstBufferEntry& entry, bool use_unified,
131 std::size_t unified_offset);
132
133 /// Configures the current global memory entries to use for the draw command.
134 void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
135
136 /// Configures the current global memory entries to use for the kernel invocation.
137 void SetupComputeGlobalMemory(Shader* kernel);
138
139 /// Configures a global memory buffer.
140 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
141 size_t size, BindlessSSBO* ssbo);
142
143 /// Configures the current textures to use for the draw command. 122 /// Configures the current textures to use for the draw command.
144 void SetupDrawTextures(const Shader* shader, size_t stage_index); 123 void SetupDrawTextures(const Shader* shader, size_t stage_index);
145 124
@@ -152,6 +131,9 @@ private:
152 /// Configures images in a compute shader. 131 /// Configures images in a compute shader.
153 void SetupComputeImages(const Shader* shader); 132 void SetupComputeImages(const Shader* shader);
154 133
134 /// Syncs state to match guest's
135 void SyncState();
136
155 /// Syncs the viewport and depth range to match the guest state 137 /// Syncs the viewport and depth range to match the guest state
156 void SyncViewport(); 138 void SyncViewport();
157 139
@@ -215,6 +197,12 @@ private:
215 /// Syncs the framebuffer sRGB state to match the guest state 197 /// Syncs the framebuffer sRGB state to match the guest state
216 void SyncFramebufferSRGB(); 198 void SyncFramebufferSRGB();
217 199
200 /// Syncs vertex formats to match the guest state
201 void SyncVertexFormats();
202
203 /// Syncs vertex instances to match the guest state
204 void SyncVertexInstances();
205
218 /// Syncs transform feedback state to match guest state 206 /// Syncs transform feedback state to match guest state
219 /// @note Only valid on assembly shaders 207 /// @note Only valid on assembly shaders
220 void SyncTransformFeedback(); 208 void SyncTransformFeedback();
@@ -225,19 +213,7 @@ private:
225 /// End a transform feedback 213 /// End a transform feedback
226 void EndTransformFeedback(); 214 void EndTransformFeedback();
227 215
228 std::size_t CalculateVertexArraysSize() const; 216 void SetupShaders(bool is_indexed);
229
230 std::size_t CalculateIndexBufferSize() const;
231
232 /// Updates the current vertex format
233 void SetupVertexFormat();
234
235 void SetupVertexBuffer();
236 void SetupVertexInstances();
237
238 GLintptr SetupIndexBuffer();
239
240 void SetupShaders();
241 217
242 Tegra::GPU& gpu; 218 Tegra::GPU& gpu;
243 Tegra::Engines::Maxwell3D& maxwell3d; 219 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +225,12 @@ private:
249 ProgramManager& program_manager; 225 ProgramManager& program_manager;
250 StateTracker& state_tracker; 226 StateTracker& state_tracker;
251 227
252 OGLStreamBuffer stream_buffer;
253 TextureCacheRuntime texture_cache_runtime; 228 TextureCacheRuntime texture_cache_runtime;
254 TextureCache texture_cache; 229 TextureCache texture_cache;
230 BufferCacheRuntime buffer_cache_runtime;
231 BufferCache buffer_cache;
255 ShaderCacheOpenGL shader_cache; 232 ShaderCacheOpenGL shader_cache;
256 QueryCache query_cache; 233 QueryCache query_cache;
257 OGLBufferCache buffer_cache;
258 FenceManagerOpenGL fence_manager; 234 FenceManagerOpenGL fence_manager;
259 235
260 VideoCommon::Shader::AsyncShaders async_shaders; 236 VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +238,8 @@ private:
262 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 238 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
263 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 239 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
264 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; 240 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
265 std::array<GLuint, MAX_TEXTURES> texture_handles; 241 std::array<GLuint, MAX_TEXTURES> texture_handles{};
266 std::array<GLuint, MAX_IMAGES> image_handles; 242 std::array<GLuint, MAX_IMAGES> image_handles{};
267
268 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
269 transform_feedback_buffers;
270 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
271 enabled_transform_feedback_buffers;
272
273 static constexpr std::size_t NUM_CONSTANT_BUFFERS =
274 Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
275 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
276 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
277 std::size_t current_cbuf = 0;
278 OGLBuffer unified_uniform_buffer;
279 243
280 /// Number of commands queued to the OpenGL driver. Resetted on flush. 244 /// Number of commands queued to the OpenGL driver. Resetted on flush.
281 std::size_t num_queued_commands = 0; 245 std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0e34a0f20..3428e5e21 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
171 handle = 0; 171 handle = 0;
172} 172}
173 173
174void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
175 ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
176
177 glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
178}
179
180void OGLSync::Create() { 174void OGLSync::Create() {
181 if (handle != 0) 175 if (handle != 0)
182 return; 176 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f48398669..552d79db4 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -234,9 +234,6 @@ public:
234 /// Deletes the internal OpenGL resource 234 /// Deletes the internal OpenGL resource
235 void Release(); 235 void Release();
236 236
237 // Converts the buffer into a stream copy buffer with a fixed size
238 void MakeStreamCopy(std::size_t buffer_size);
239
240 GLuint handle = 0; 237 GLuint handle = 0;
241}; 238};
242 239
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c35b71b6b..ac78d344c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); 64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); 65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt 67constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint 68#define ftou floatBitsToUint
69#define itof intBitsToFloat 69#define itof intBitsToFloat
70#define utof uintBitsToFloat 70#define utof uintBitsToFloat
@@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
77 77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); 78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); 79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80
81layout (std140, binding = {}) uniform vs_config {{
82 float y_direction;
83}};
84)"; 80)";
85 81
86class ShaderWriter final { 82class ShaderWriter final {
@@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 398 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
403} 399}
404 400
405bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
406 const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
407 // We waste one UBO for emulation
408 const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
409 return num_ubos > num_available_ubos;
410}
411
412struct GenericVaryingDescription { 401struct GenericVaryingDescription {
413 std::string name; 402 std::string name;
414 u8 first_element = 0; 403 u8 first_element = 0;
@@ -420,9 +409,8 @@ public:
420 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, 409 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
421 ShaderType stage_, std::string_view identifier_, 410 ShaderType stage_, std::string_view identifier_,
422 std::string_view suffix_) 411 std::string_view suffix_)
423 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, 412 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
424 suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ 413 identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
425 UseUnifiedUniforms(device_, ir_, stage_)} {
426 if (stage != ShaderType::Compute) { 414 if (stage != ShaderType::Compute) {
427 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 415 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
428 } 416 }
@@ -516,7 +504,8 @@ private:
516 if (!identifier.empty()) { 504 if (!identifier.empty()) {
517 code.AddLine("// {}", identifier); 505 code.AddLine("// {}", identifier);
518 } 506 }
519 code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); 507 const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
508 code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
520 code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); 509 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
521 if (device.HasShaderBallot()) { 510 if (device.HasShaderBallot()) {
522 code.AddLine("#extension GL_ARB_shader_ballot : require"); 511 code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -542,7 +531,7 @@ private:
542 531
543 code.AddNewLine(); 532 code.AddNewLine();
544 533
545 code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); 534 code.AddLine(COMMON_DECLARATIONS);
546 } 535 }
547 536
548 void DeclareVertex() { 537 void DeclareVertex() {
@@ -865,17 +854,6 @@ private:
865 } 854 }
866 855
867 void DeclareConstantBuffers() { 856 void DeclareConstantBuffers() {
868 if (use_unified_uniforms) {
869 const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
870 static_cast<u32>(ir.GetGlobalMemory().size());
871 code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
872 binding);
873 code.AddLine(" uint cbufs[];");
874 code.AddLine("}};");
875 code.AddNewLine();
876 return;
877 }
878
879 u32 binding = device.GetBaseBindings(stage).uniform_buffer; 857 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
880 for (const auto& [index, info] : ir.GetConstantBuffers()) { 858 for (const auto& [index, info] : ir.GetConstantBuffers()) {
881 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); 859 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
@@ -1081,29 +1059,17 @@ private:
1081 1059
1082 if (const auto cbuf = std::get_if<CbufNode>(&*node)) { 1060 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1083 const Node offset = cbuf->GetOffset(); 1061 const Node offset = cbuf->GetOffset();
1084 const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
1085 1062
1086 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1087 // Direct access 1064 // Direct access
1088 const u32 offset_imm = immediate->GetValue(); 1065 const u32 offset_imm = immediate->GetValue();
1089 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); 1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1090 if (use_unified_uniforms) { 1067 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1091 return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), 1068 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1092 Type::Uint};
1093 } else {
1094 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1095 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1096 Type::Uint};
1097 }
1098 }
1099
1100 // Indirect access
1101 if (use_unified_uniforms) {
1102 return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
1103 Visit(offset).AsUint()),
1104 Type::Uint}; 1069 Type::Uint};
1105 } 1070 }
1106 1071
1072 // Indirect access
1107 const std::string final_offset = code.GenerateTemporary(); 1073 const std::string final_offset = code.GenerateTemporary();
1108 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); 1074 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1109 1075
@@ -2293,7 +2259,6 @@ private:
2293 } 2259 }
2294 } 2260 }
2295 } 2261 }
2296
2297 if (header.ps.omap.depth) { 2262 if (header.ps.omap.depth) {
2298 // The depth output is always 2 registers after the last color output, and current_reg 2263 // The depth output is always 2 registers after the last color output, and current_reg
2299 // already contains one past the last color register. 2264 // already contains one past the last color register.
@@ -2337,7 +2302,8 @@ private:
2337 } 2302 }
2338 2303
2339 Expression YNegate(Operation operation) { 2304 Expression YNegate(Operation operation) {
2340 return {"y_direction", Type::Float}; 2305 // Y_NEGATE is mapped to this uniform value
2306 return {"gl_FrontMaterial.ambient.a", Type::Float};
2341 } 2307 }
2342 2308
2343 template <u32 element> 2309 template <u32 element>
@@ -2787,7 +2753,6 @@ private:
2787 const std::string_view identifier; 2753 const std::string_view identifier;
2788 const std::string_view suffix; 2754 const std::string_view suffix;
2789 const Header header; 2755 const Header header;
2790 const bool use_unified_uniforms;
2791 std::unordered_map<u8, VaryingTFB> transform_feedback; 2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2792 2757
2793 ShaderWriter code; 2758 ShaderWriter code;
@@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
3003 for (std::size_t i = 0; i < std::size(clip_distances); ++i) { 2968 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
3004 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; 2969 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
3005 } 2970 }
2971 for (const auto& buffer : entries.const_buffers) {
2972 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
2973 }
3006 entries.shader_length = ir.GetLength(); 2974 entries.shader_length = ir.GetLength();
3007 entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
3008 return entries; 2975 return entries;
3009} 2976}
3010 2977
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index be68994bb..0397a000c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -55,7 +55,7 @@ struct ShaderEntries {
55 std::vector<ImageEntry> images; 55 std::vector<ImageEntry> images;
56 std::size_t shader_length{}; 56 std::size_t shader_length{};
57 u32 clip_distances{}; 57 u32 clip_distances{};
58 bool use_unified_uniforms{}; 58 u32 enabled_uniform_buffers{};
59}; 59};
60 60
61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 60e6fa39f..dbdf5230f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
36 FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); 36 FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
37} 37}
38 38
39void SetupDirtyVertexArrays(Tables& tables) { 39void SetupDirtyVertexInstances(Tables& tables) {
40 static constexpr std::size_t num_array = 3;
41 static constexpr std::size_t instance_base_offset = 3; 40 static constexpr std::size_t instance_base_offset = 3;
42 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { 41 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
43 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); 42 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
44 const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
45
46 FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
47 FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
48
49 const std::size_t instance_array_offset = array_offset + instance_base_offset; 43 const std::size_t instance_array_offset = array_offset + instance_base_offset;
50 tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); 44 tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
51 tables[1][instance_array_offset] = VertexInstances; 45 tables[1][instance_array_offset] = VertexInstances;
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
217StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { 211StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
218 auto& dirty = gpu.Maxwell3D().dirty; 212 auto& dirty = gpu.Maxwell3D().dirty;
219 auto& tables = dirty.tables; 213 auto& tables = dirty.tables;
220 SetupDirtyRenderTargets(tables); 214 SetupDirtyFlags(tables);
221 SetupDirtyColorMasks(tables); 215 SetupDirtyColorMasks(tables);
222 SetupDirtyViewports(tables); 216 SetupDirtyViewports(tables);
223 SetupDirtyScissors(tables); 217 SetupDirtyScissors(tables);
224 SetupDirtyVertexArrays(tables); 218 SetupDirtyVertexInstances(tables);
225 SetupDirtyVertexFormat(tables); 219 SetupDirtyVertexFormat(tables);
226 SetupDirtyShaders(tables); 220 SetupDirtyShaders(tables);
227 SetupDirtyPolygonModes(tables); 221 SetupDirtyPolygonModes(tables);
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
241 SetupDirtyClipControl(tables); 235 SetupDirtyClipControl(tables);
242 SetupDirtyDepthClampEnabled(tables); 236 SetupDirtyDepthClampEnabled(tables);
243 SetupDirtyMisc(tables); 237 SetupDirtyMisc(tables);
244
245 auto& store = dirty.on_write_stores;
246 store[VertexBuffers] = true;
247 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
248 store[VertexBuffer0 + i] = true;
249 }
250}
251
252void StateTracker::InvalidateStreamBuffer() {
253 flags[Dirty::VertexBuffers] = true;
254 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
255 flags[index] = true;
256 }
257} 238}
258 239
259} // namespace OpenGL 240} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 574615d3c..94c905116 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -28,10 +28,6 @@ enum : u8 {
28 VertexFormat0, 28 VertexFormat0,
29 VertexFormat31 = VertexFormat0 + 31, 29 VertexFormat31 = VertexFormat0 + 31,
30 30
31 VertexBuffers,
32 VertexBuffer0,
33 VertexBuffer31 = VertexBuffer0 + 31,
34
35 VertexInstances, 31 VertexInstances,
36 VertexInstance0, 32 VertexInstance0,
37 VertexInstance31 = VertexInstance0 + 31, 33 VertexInstance31 = VertexInstance0 + 31,
@@ -92,8 +88,6 @@ class StateTracker {
92public: 88public:
93 explicit StateTracker(Tegra::GPU& gpu); 89 explicit StateTracker(Tegra::GPU& gpu);
94 90
95 void InvalidateStreamBuffer();
96
97 void BindIndexBuffer(GLuint new_index_buffer) { 91 void BindIndexBuffer(GLuint new_index_buffer) {
98 if (index_buffer == new_index_buffer) { 92 if (index_buffer == new_index_buffer) {
99 return; 93 return;
@@ -110,13 +104,32 @@ public:
110 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); 104 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
111 } 105 }
112 106
107 void ClipControl(GLenum new_origin, GLenum new_depth) {
108 if (new_origin == origin && new_depth == depth) {
109 return;
110 }
111 origin = new_origin;
112 depth = new_depth;
113 glClipControl(origin, depth);
114 }
115
116 void SetYNegate(bool new_y_negate) {
117 if (new_y_negate == y_negate) {
118 return;
119 }
120 // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
121 y_negate = new_y_negate;
122 const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
123 glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
124 }
125
113 void NotifyScreenDrawVertexArray() { 126 void NotifyScreenDrawVertexArray() {
114 flags[OpenGL::Dirty::VertexFormats] = true; 127 flags[OpenGL::Dirty::VertexFormats] = true;
115 flags[OpenGL::Dirty::VertexFormat0 + 0] = true; 128 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
116 flags[OpenGL::Dirty::VertexFormat0 + 1] = true; 129 flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
117 130
118 flags[OpenGL::Dirty::VertexBuffers] = true; 131 flags[VideoCommon::Dirty::VertexBuffers] = true;
119 flags[OpenGL::Dirty::VertexBuffer0] = true; 132 flags[VideoCommon::Dirty::VertexBuffer0] = true;
120 133
121 flags[OpenGL::Dirty::VertexInstances] = true; 134 flags[OpenGL::Dirty::VertexInstances] = true;
122 flags[OpenGL::Dirty::VertexInstance0 + 0] = true; 135 flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -202,6 +215,9 @@ private:
202 215
203 GLuint framebuffer = 0; 216 GLuint framebuffer = 0;
204 GLuint index_buffer = 0; 217 GLuint index_buffer = 0;
218 GLenum origin = GL_LOWER_LEFT;
219 GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
220 bool y_negate = false;
205}; 221};
206 222
207} // namespace OpenGL 223} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e0819cdf2..77b3ee0fe 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,70 +1,64 @@
1// Copyright 2018 Citra Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <tuple> 5#include <array>
6#include <vector> 6#include <memory>
7#include <span>
8
9#include <glad/glad.h>
7 10
8#include "common/alignment.h" 11#include "common/alignment.h"
9#include "common/assert.h" 12#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
13#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
14 14
15MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
16 MP_RGB(128, 128, 192));
17
18namespace OpenGL { 15namespace OpenGL {
19 16
20OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) 17StreamBuffer::StreamBuffer() {
21 : state_tracker{state_tracker_} { 18 static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
22 gl_buffer.Create(); 19 buffer.Create();
23 20 glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
24 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; 21 glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
25 glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); 22 mapped_pointer =
26 mapped_ptr = static_cast<u8*>( 23 static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
27 glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 24 for (OGLSync& sync : fences) {
28 25 sync.Create();
29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 } 26 }
33} 27}
34 28
35OGLStreamBuffer::~OGLStreamBuffer() { 29std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
36 glUnmapNamedBuffer(gl_buffer.handle); 30 ASSERT(size < REGION_SIZE);
37 gl_buffer.Release(); 31 for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
38} 32 ++region) {
39 33 fences[region].Create();
40std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
41 ASSERT(size <= BUFFER_SIZE);
42 ASSERT(alignment <= BUFFER_SIZE);
43 mapped_size = size;
44
45 if (alignment > 0) {
46 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
47 } 34 }
35 used_iterator = iterator;
48 36
49 if (buffer_pos + size > BUFFER_SIZE) { 37 for (size_t region = Region(free_iterator) + 1,
50 MICROPROFILE_SCOPE(OpenGL_StreamBuffer); 38 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
51 glInvalidateBufferData(gl_buffer.handle); 39 region < region_end; ++region) {
52 state_tracker.InvalidateStreamBuffer(); 40 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
53 41 fences[region].Release();
54 buffer_pos = 0;
55 } 42 }
56 43 if (iterator + size >= free_iterator) {
57 return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); 44 free_iterator = iterator + size;
58}
59
60void OGLStreamBuffer::Unmap(GLsizeiptr size) {
61 ASSERT(size <= mapped_size);
62
63 if (size > 0) {
64 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
65 } 45 }
66 46 if (iterator + size > STREAM_BUFFER_SIZE) {
67 buffer_pos += size; 47 for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
48 fences[region].Create();
49 }
50 used_iterator = 0;
51 iterator = 0;
52 free_iterator = size;
53
54 for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
55 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
56 fences[region].Release();
57 }
58 }
59 const size_t offset = iterator;
60 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
61 return {std::span(mapped_pointer + offset, size), offset};
68} 62}
69 63
70} // namespace OpenGL 64} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index dd9cf67eb..6dbb6bfba 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,9 +1,12 @@
1// Copyright 2018 Citra Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <memory>
9#include <span>
7#include <utility> 10#include <utility>
8 11
9#include <glad/glad.h> 12#include <glad/glad.h>
@@ -13,48 +16,35 @@
13 16
14namespace OpenGL { 17namespace OpenGL {
15 18
16class Device; 19class StreamBuffer {
17class StateTracker; 20 static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
21 static constexpr size_t NUM_SYNCS = 16;
22 static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
23 static constexpr size_t MAX_ALIGNMENT = 256;
24 static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
25 static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
26 static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
18 27
19class OGLStreamBuffer : private NonCopyable {
20public: 28public:
21 explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); 29 explicit StreamBuffer();
22 ~OGLStreamBuffer();
23
24 /*
25 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
26 * and the optional alignment requirement.
27 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
28 * The return values are the pointer to the new chunk, and the offset within the buffer.
29 * The actual used size must be specified on unmapping the chunk.
30 */
31 std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
32
33 void Unmap(GLsizeiptr size);
34
35 GLuint Handle() const {
36 return gl_buffer.handle;
37 }
38 30
39 u64 Address() const { 31 [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
40 return gpu_address;
41 }
42 32
43 GLsizeiptr Size() const noexcept { 33 [[nodiscard]] GLuint Handle() const noexcept {
44 return BUFFER_SIZE; 34 return buffer.handle;
45 } 35 }
46 36
47private: 37private:
48 static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; 38 [[nodiscard]] static size_t Region(size_t offset) noexcept {
49 39 return offset / REGION_SIZE;
50 StateTracker& state_tracker; 40 }
51
52 OGLBuffer gl_buffer;
53 41
54 GLuint64EXT gpu_address = 0; 42 size_t iterator = 0;
55 GLintptr buffer_pos = 0; 43 size_t used_iterator = 0;
56 GLsizeiptr mapped_size = 0; 44 size_t free_iterator = 0;
57 u8* mapped_ptr = nullptr; 45 u8* mapped_pointer = nullptr;
46 OGLBuffer buffer;
47 std::array<OGLSync, NUM_SYNCS> fences;
58}; 48};
59 49
60} // namespace OpenGL 50} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 546cb6d00..12434db67 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
398 398
399} // Anonymous namespace 399} // Anonymous namespace
400 400
401ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
402 : span(map, size), sync{sync_}, handle{handle_} {}
403
404ImageBufferMap::~ImageBufferMap() { 401ImageBufferMap::~ImageBufferMap() {
405 if (sync) { 402 if (sync) {
406 sync->Create(); 403 sync->Create();
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
487 glFinish(); 484 glFinish();
488} 485}
489 486
490ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { 487ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
491 return upload_buffers.RequestMap(size, true); 488 return upload_buffers.RequestMap(size, true);
492} 489}
493 490
494ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { 491ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
495 return download_buffers.RequestMap(size, false); 492 return download_buffers.RequestMap(size, false);
496} 493}
497 494
@@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
553} 550}
554 551
555void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, 552void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
556 size_t buffer_offset,
557 std::span<const SwizzleParameters> swizzles) { 553 std::span<const SwizzleParameters> swizzles) {
558 switch (image.info.type) { 554 switch (image.info.type) {
559 case ImageType::e2D: 555 case ImageType::e2D:
560 return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); 556 return util_shaders.BlockLinearUpload2D(image, map, swizzles);
561 case ImageType::e3D: 557 case ImageType::e3D:
562 return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); 558 return util_shaders.BlockLinearUpload3D(image, map, swizzles);
563 case ImageType::Linear: 559 case ImageType::Linear:
564 return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); 560 return util_shaders.PitchUpload(image, map, swizzles);
565 default: 561 default:
566 UNREACHABLE(); 562 UNREACHABLE();
567 break; 563 break;
@@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
596 bool insert_fence) { 592 bool insert_fence) {
597 const size_t index = RequestBuffer(requested_size); 593 const size_t index = RequestBuffer(requested_size);
598 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; 594 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
599 return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); 595 return ImageBufferMap{
596 .mapped_span = std::span(maps[index], requested_size),
597 .sync = sync,
598 .buffer = buffers[index].handle,
599 };
600} 600}
601 601
602size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { 602size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
709 } 709 }
710} 710}
711 711
712void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 712void Image::UploadMemory(const ImageBufferMap& map,
713 std::span<const VideoCommon::BufferImageCopy> copies) { 713 std::span<const VideoCommon::BufferImageCopy> copies) {
714 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); 714 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
715 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); 715 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
716 716
717 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); 717 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
718 718
@@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
728 current_image_height = copy.buffer_image_height; 728 current_image_height = copy.buffer_image_height;
729 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); 729 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
730 } 730 }
731 CopyBufferToImage(copy, buffer_offset); 731 CopyBufferToImage(copy, map.offset);
732 } 732 }
733} 733}
734 734
735void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 735void Image::UploadMemory(const ImageBufferMap& map,
736 std::span<const VideoCommon::BufferCopy> copies) { 736 std::span<const VideoCommon::BufferCopy> copies) {
737 for (const VideoCommon::BufferCopy& copy : copies) { 737 for (const VideoCommon::BufferCopy& copy : copies) {
738 glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, 738 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
739 copy.dst_offset, copy.size); 739 copy.dst_offset, copy.size);
740 } 740 }
741} 741}
742 742
743void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, 743void Image::DownloadMemory(ImageBufferMap& map,
744 std::span<const VideoCommon::BufferImageCopy> copies) { 744 std::span<const VideoCommon::BufferImageCopy> copies) {
745 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 745 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
746 746
747 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); 747 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
748 glPixelStorei(GL_PACK_ALIGNMENT, 1); 748 glPixelStorei(GL_PACK_ALIGNMENT, 1);
749 749
750 u32 current_row_length = std::numeric_limits<u32>::max(); 750 u32 current_row_length = std::numeric_limits<u32>::max();
@@ -759,7 +759,38 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
759 current_image_height = copy.buffer_image_height; 759 current_image_height = copy.buffer_image_height;
760 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); 760 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
761 } 761 }
762 CopyImageToBuffer(copy, buffer_offset); 762 CopyImageToBuffer(copy, map.offset);
763 }
764}
765
766GLuint Image::StorageHandle() noexcept {
767 switch (info.format) {
768 case PixelFormat::A8B8G8R8_SRGB:
769 case PixelFormat::B8G8R8A8_SRGB:
770 case PixelFormat::BC1_RGBA_SRGB:
771 case PixelFormat::BC2_SRGB:
772 case PixelFormat::BC3_SRGB:
773 case PixelFormat::BC7_SRGB:
774 case PixelFormat::ASTC_2D_4X4_SRGB:
775 case PixelFormat::ASTC_2D_8X8_SRGB:
776 case PixelFormat::ASTC_2D_8X5_SRGB:
777 case PixelFormat::ASTC_2D_5X4_SRGB:
778 case PixelFormat::ASTC_2D_5X5_SRGB:
779 case PixelFormat::ASTC_2D_10X8_SRGB:
780 case PixelFormat::ASTC_2D_6X6_SRGB:
781 case PixelFormat::ASTC_2D_10X10_SRGB:
782 case PixelFormat::ASTC_2D_12X12_SRGB:
783 case PixelFormat::ASTC_2D_8X6_SRGB:
784 case PixelFormat::ASTC_2D_6X5_SRGB:
785 if (store_view.handle != 0) {
786 return store_view.handle;
787 }
788 store_view.Create();
789 glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
790 info.resources.levels, 0, info.resources.layers);
791 return store_view.handle;
792 default:
793 return texture.handle;
763 } 794 }
764} 795}
765 796
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 15b7c3676..a6172f009 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -31,23 +31,13 @@ using VideoCommon::NUM_RT;
31using VideoCommon::Offset2D; 31using VideoCommon::Offset2D;
32using VideoCommon::RenderTargets; 32using VideoCommon::RenderTargets;
33 33
34class ImageBufferMap { 34struct ImageBufferMap {
35public:
36 explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
37 ~ImageBufferMap(); 35 ~ImageBufferMap();
38 36
39 GLuint Handle() const noexcept { 37 std::span<u8> mapped_span;
40 return handle; 38 size_t offset = 0;
41 }
42
43 std::span<u8> Span() const noexcept {
44 return span;
45 }
46
47private:
48 std::span<u8> span;
49 OGLSync* sync; 39 OGLSync* sync;
50 GLuint handle; 40 GLuint buffer;
51}; 41};
52 42
53struct FormatProperties { 43struct FormatProperties {
@@ -69,9 +59,9 @@ public:
69 59
70 void Finish(); 60 void Finish();
71 61
72 ImageBufferMap MapUploadBuffer(size_t size); 62 ImageBufferMap UploadStagingBuffer(size_t size);
73 63
74 ImageBufferMap MapDownloadBuffer(size_t size); 64 ImageBufferMap DownloadStagingBuffer(size_t size);
75 65
76 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 66 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
77 67
@@ -89,7 +79,7 @@ public:
89 Tegra::Engines::Fermi2D::Filter filter, 79 Tegra::Engines::Fermi2D::Filter filter,
90 Tegra::Engines::Fermi2D::Operation operation); 80 Tegra::Engines::Fermi2D::Operation operation);
91 81
92 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 82 void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
93 std::span<const VideoCommon::SwizzleParameters> swizzles); 83 std::span<const VideoCommon::SwizzleParameters> swizzles);
94 84
95 void InsertUploadMemoryBarrier(); 85 void InsertUploadMemoryBarrier();
@@ -148,14 +138,14 @@ public:
148 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 138 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
149 VAddr cpu_addr); 139 VAddr cpu_addr);
150 140
151 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 141 void UploadMemory(const ImageBufferMap& map,
152 std::span<const VideoCommon::BufferImageCopy> copies); 142 std::span<const VideoCommon::BufferImageCopy> copies);
153 143
154 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 144 void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
155 std::span<const VideoCommon::BufferCopy> copies); 145
146 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
156 147
157 void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, 148 GLuint StorageHandle() noexcept;
158 std::span<const VideoCommon::BufferImageCopy> copies);
159 149
160 GLuint Handle() const noexcept { 150 GLuint Handle() const noexcept {
161 return texture.handle; 151 return texture.handle;
@@ -167,8 +157,8 @@ private:
167 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); 157 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
168 158
169 OGLTexture texture; 159 OGLTexture texture;
170 OGLTextureView store_view;
171 OGLBuffer buffer; 160 OGLBuffer buffer;
161 OGLTextureView store_view;
172 GLenum gl_internal_format = GL_NONE; 162 GLenum gl_internal_format = GL_NONE;
173 GLenum gl_format = GL_NONE; 163 GLenum gl_format = GL_NONE;
174 GLenum gl_type = GL_NONE; 164 GLenum gl_type = GL_NONE;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index cbccfdeb4..f7ad8f370 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -4,23 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <glad/glad.h> 7#include <glad/glad.h>
9#include "common/common_types.h"
10#include "common/logging/log.h"
11#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
12 9
13namespace OpenGL { 10namespace OpenGL::MaxwellToGL {
14
15using GLvec2 = std::array<GLfloat, 2>;
16using GLvec3 = std::array<GLfloat, 3>;
17using GLvec4 = std::array<GLfloat, 4>;
18
19using GLuvec2 = std::array<GLuint, 2>;
20using GLuvec3 = std::array<GLuint, 3>;
21using GLuvec4 = std::array<GLuint, 4>;
22
23namespace MaxwellToGL {
24 11
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 12using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26 13
@@ -317,26 +304,6 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
317 return GL_ZERO; 304 return GL_ZERO;
318} 305}
319 306
320inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
321 switch (source) {
322 case Tegra::Texture::SwizzleSource::Zero:
323 return GL_ZERO;
324 case Tegra::Texture::SwizzleSource::R:
325 return GL_RED;
326 case Tegra::Texture::SwizzleSource::G:
327 return GL_GREEN;
328 case Tegra::Texture::SwizzleSource::B:
329 return GL_BLUE;
330 case Tegra::Texture::SwizzleSource::A:
331 return GL_ALPHA;
332 case Tegra::Texture::SwizzleSource::OneInt:
333 case Tegra::Texture::SwizzleSource::OneFloat:
334 return GL_ONE;
335 }
336 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
337 return GL_ZERO;
338}
339
340inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { 307inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
341 switch (comparison) { 308 switch (comparison) {
342 case Maxwell::ComparisonOp::Never: 309 case Maxwell::ComparisonOp::Never:
@@ -493,5 +460,4 @@ inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
493 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); 460 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
494} 461}
495 462
496} // namespace MaxwellToGL 463} // namespace OpenGL::MaxwellToGL
497} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 21159e498..9d2acd4d9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -29,9 +29,7 @@
29#include "video_core/textures/decoders.h" 29#include "video_core/textures/decoders.h"
30 30
31namespace OpenGL { 31namespace OpenGL {
32
33namespace { 32namespace {
34
35constexpr GLint PositionLocation = 0; 33constexpr GLint PositionLocation = 0;
36constexpr GLint TexCoordLocation = 1; 34constexpr GLint TexCoordLocation = 1;
37constexpr GLint ModelViewMatrixLocation = 0; 35constexpr GLint ModelViewMatrixLocation = 0;
@@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
124 break; 122 break;
125 } 123 }
126} 124}
127
128} // Anonymous namespace 125} // Anonymous namespace
129 126
130RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, 127RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
@@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
132 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 129 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
133 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 130 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
134 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, 131 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
135 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} 132 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
133 program_manager{device},
134 rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
135 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
136 glEnable(GL_DEBUG_OUTPUT);
137 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
138 glDebugMessageCallback(DebugHandler, nullptr);
139 }
140 AddTelemetryFields();
141 InitOpenGLObjects();
142}
136 143
137RendererOpenGL::~RendererOpenGL() = default; 144RendererOpenGL::~RendererOpenGL() = default;
138 145
@@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
148 155
149 ++m_current_frame; 156 ++m_current_frame;
150 157
151 rasterizer->TickFrame(); 158 rasterizer.TickFrame();
152 159
153 context->SwapBuffers(); 160 context->SwapBuffers();
154 render_window.OnFrameDisplayed(); 161 render_window.OnFrameDisplayed();
@@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
179 framebuffer_crop_rect = framebuffer.crop_rect; 186 framebuffer_crop_rect = framebuffer.crop_rect;
180 187
181 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; 188 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
182 if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { 189 if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
183 return; 190 return;
184 } 191 }
185 192
@@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() {
267 // Enable unified vertex attributes and query vertex buffer address when the driver supports it 274 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
268 if (device.HasVertexBufferUnifiedMemory()) { 275 if (device.HasVertexBufferUnifiedMemory()) {
269 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); 276 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
277 glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
270 278
271 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); 279 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
272 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, 280 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() {
289 telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); 297 telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
290} 298}
291 299
292void RendererOpenGL::CreateRasterizer() {
293 if (rasterizer) {
294 return;
295 }
296 rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
297 screen_info, program_manager, state_tracker);
298}
299
300void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 300void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
301 const Tegra::FramebufferConfig& framebuffer) { 301 const Tegra::FramebufferConfig& framebuffer) {
302 texture.width = framebuffer.width; 302 texture.width = framebuffer.width;
@@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
407 407
408 program_manager.BindHostPipeline(pipeline.handle); 408 program_manager.BindHostPipeline(pipeline.handle);
409 409
410 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
410 glEnable(GL_CULL_FACE); 411 glEnable(GL_CULL_FACE);
411 if (screen_info.display_srgb) { 412 if (screen_info.display_srgb) {
412 glEnable(GL_FRAMEBUFFER_SRGB); 413 glEnable(GL_FRAMEBUFFER_SRGB);
@@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
425 glCullFace(GL_BACK); 426 glCullFace(GL_BACK);
426 glFrontFace(GL_CW); 427 glFrontFace(GL_CW);
427 glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); 428 glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
428 glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
429 glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), 429 glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
430 static_cast<GLfloat>(layout.height)); 430 static_cast<GLfloat>(layout.height));
431 glDepthRangeIndexed(0, 0.0, 0.0); 431 glDepthRangeIndexed(0, 0.0, 0.0);
@@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
497 renderer_settings.screenshot_requested = false; 497 renderer_settings.screenshot_requested = false;
498} 498}
499 499
500bool RendererOpenGL::Init() {
501 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
502 glEnable(GL_DEBUG_OUTPUT);
503 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
504 glDebugMessageCallback(DebugHandler, nullptr);
505 }
506
507 AddTelemetryFields();
508
509 if (!GLAD_GL_VERSION_4_6) {
510 return false;
511 }
512
513 InitOpenGLObjects();
514 CreateRasterizer();
515
516 return true;
517}
518
519void RendererOpenGL::ShutDown() {}
520
521} // namespace OpenGL 500} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 44e109794..cc19a110f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
12#include "video_core/renderer_opengl/gl_device.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_shader_manager.h" 15#include "video_core/renderer_opengl/gl_shader_manager.h"
15#include "video_core/renderer_opengl/gl_state_tracker.h" 16#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -63,18 +64,18 @@ public:
63 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 64 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
64 ~RendererOpenGL() override; 65 ~RendererOpenGL() override;
65 66
66 bool Init() override;
67 void ShutDown() override;
68 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 67 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
69 68
69 VideoCore::RasterizerInterface* ReadRasterizer() override {
70 return &rasterizer;
71 }
72
70private: 73private:
71 /// Initializes the OpenGL state and creates persistent objects. 74 /// Initializes the OpenGL state and creates persistent objects.
72 void InitOpenGLObjects(); 75 void InitOpenGLObjects();
73 76
74 void AddTelemetryFields(); 77 void AddTelemetryFields();
75 78
76 void CreateRasterizer();
77
78 void ConfigureFramebufferTexture(TextureInfo& texture, 79 void ConfigureFramebufferTexture(TextureInfo& texture,
79 const Tegra::FramebufferConfig& framebuffer); 80 const Tegra::FramebufferConfig& framebuffer);
80 81
@@ -98,8 +99,10 @@ private:
98 Core::Memory::Memory& cpu_memory; 99 Core::Memory::Memory& cpu_memory;
99 Tegra::GPU& gpu; 100 Tegra::GPU& gpu;
100 101
101 const Device device; 102 Device device;
102 StateTracker state_tracker{gpu}; 103 StateTracker state_tracker;
104 ProgramManager program_manager;
105 RasterizerOpenGL rasterizer;
103 106
104 // OpenGL object IDs 107 // OpenGL object IDs
105 OGLSampler present_sampler; 108 OGLSampler present_sampler;
@@ -115,9 +118,6 @@ private:
115 /// Display information for Switch screen 118 /// Display information for Switch screen
116 ScreenInfo screen_info; 119 ScreenInfo screen_info;
117 120
118 /// Global dummy shader pipeline
119 ProgramManager program_manager;
120
121 /// OpenGL framebuffer data 121 /// OpenGL framebuffer data
122 std::vector<u8> gl_framebuffer_data; 122 std::vector<u8> gl_framebuffer_data;
123 123
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index eb849cbf2..31ec68505 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
63 63
64UtilShaders::~UtilShaders() = default; 64UtilShaders::~UtilShaders() = default;
65 65
66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
67 std::span<const SwizzleParameters> swizzles) { 67 std::span<const SwizzleParameters> swizzles) {
68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; 69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
72 72
73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); 73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
74 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); 74 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
76 76
77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
78 for (const SwizzleParameters& swizzle : swizzles) { 78 for (const SwizzleParameters& swizzle : swizzles) {
79 const Extent3D num_tiles = swizzle.num_tiles; 79 const Extent3D num_tiles = swizzle.num_tiles;
80 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 80 const size_t input_offset = swizzle.buffer_offset + map.offset;
81 81
82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -91,16 +91,16 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
91 glUniform1ui(5, params.x_shift); 91 glUniform1ui(5, params.x_shift);
92 glUniform1ui(6, params.block_height); 92 glUniform1ui(6, params.block_height);
93 glUniform1ui(7, params.block_height_mask); 93 glUniform1ui(7, params.block_height_mask);
94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), 94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
95 input_offset, image.guest_size_bytes - swizzle.buffer_offset); 95 image.guest_size_bytes - swizzle.buffer_offset);
96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, 96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
97 GL_WRITE_ONLY, store_format); 97 GL_WRITE_ONLY, store_format);
98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); 98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
99 } 99 }
100 program_manager.RestoreGuestCompute(); 100 program_manager.RestoreGuestCompute();
101} 101}
102 102
103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
104 std::span<const SwizzleParameters> swizzles) { 104 std::span<const SwizzleParameters> swizzles) {
105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; 105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
106 106
@@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
108 static constexpr GLuint BINDING_INPUT_BUFFER = 1; 108 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
110 110
111 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); 111 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); 112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
114 114
115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
116 for (const SwizzleParameters& swizzle : swizzles) { 116 for (const SwizzleParameters& swizzle : swizzles) {
117 const Extent3D num_tiles = swizzle.num_tiles; 117 const Extent3D num_tiles = swizzle.num_tiles;
118 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 118 const size_t input_offset = swizzle.buffer_offset + map.offset;
119 119
120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -132,16 +132,16 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
132 glUniform1ui(7, params.block_height_mask); 132 glUniform1ui(7, params.block_height_mask);
133 glUniform1ui(8, params.block_depth); 133 glUniform1ui(8, params.block_depth);
134 glUniform1ui(9, params.block_depth_mask); 134 glUniform1ui(9, params.block_depth_mask);
135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), 135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
136 input_offset, image.guest_size_bytes - swizzle.buffer_offset); 136 image.guest_size_bytes - swizzle.buffer_offset);
137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, 137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
138 GL_WRITE_ONLY, store_format); 138 GL_WRITE_ONLY, store_format);
139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); 139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
140 } 140 }
141 program_manager.RestoreGuestCompute(); 141 program_manager.RestoreGuestCompute();
142} 142}
143 143
144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
145 std::span<const SwizzleParameters> swizzles) { 145 std::span<const SwizzleParameters> swizzles) {
146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
147 static constexpr GLuint BINDING_INPUT_BUFFER = 0; 147 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,21 +159,22 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
159 "Non-power of two images are not implemented"); 159 "Non-power of two images are not implemented");
160 160
161 program_manager.BindHostCompute(pitch_unswizzle_program.handle); 161 program_manager.BindHostCompute(pitch_unswizzle_program.handle);
162 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); 162 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
163 glUniform2ui(LOC_ORIGIN, 0, 0); 163 glUniform2ui(LOC_ORIGIN, 0, 0);
164 glUniform2i(LOC_DESTINATION, 0, 0); 164 glUniform2i(LOC_DESTINATION, 0, 0);
165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); 165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
166 glUniform1ui(LOC_PITCH, pitch); 166 glUniform1ui(LOC_PITCH, pitch);
167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); 167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
168 format);
168 for (const SwizzleParameters& swizzle : swizzles) { 169 for (const SwizzleParameters& swizzle : swizzles) {
169 const Extent3D num_tiles = swizzle.num_tiles; 170 const Extent3D num_tiles = swizzle.num_tiles;
170 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 171 const size_t input_offset = swizzle.buffer_offset + map.offset;
171 172
172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 173 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 174 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
174 175
175 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), 176 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
176 input_offset, image.guest_size_bytes - swizzle.buffer_offset); 177 image.guest_size_bytes - swizzle.buffer_offset);
177 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); 178 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
178 } 179 }
179 program_manager.RestoreGuestCompute(); 180 program_manager.RestoreGuestCompute();
@@ -195,9 +196,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
195 196
196 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); 197 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
197 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); 198 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
198 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, 199 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
199 GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); 200 copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
200 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), 201 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
201 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); 202 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
202 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); 203 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
203 } 204 }
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 359997255..7b1d16b09 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -15,21 +15,22 @@
15namespace OpenGL { 15namespace OpenGL {
16 16
17class Image; 17class Image;
18class ImageBufferMap;
19class ProgramManager; 18class ProgramManager;
20 19
20struct ImageBufferMap;
21
21class UtilShaders { 22class UtilShaders {
22public: 23public:
23 explicit UtilShaders(ProgramManager& program_manager); 24 explicit UtilShaders(ProgramManager& program_manager);
24 ~UtilShaders(); 25 ~UtilShaders();
25 26
26 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 27 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
27 std::span<const VideoCommon::SwizzleParameters> swizzles); 28 std::span<const VideoCommon::SwizzleParameters> swizzles);
28 29
29 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 30 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
30 std::span<const VideoCommon::SwizzleParameters> swizzles); 31 std::span<const VideoCommon::SwizzleParameters> swizzles);
31 32
32 void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 33 void PitchUpload(Image& image, const ImageBufferMap& map,
33 std::span<const VideoCommon::SwizzleParameters> swizzles); 34 std::span<const VideoCommon::SwizzleParameters> swizzles);
34 35
35 void CopyBC4(Image& dst_image, Image& src_image, 36 void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5be6dabd9..362278f01 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -12,14 +12,15 @@
12#include "common/cityhash.h" 12#include "common/cityhash.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 14#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
15#include "video_core/renderer_vulkan/vk_state_tracker.h"
15 16
16namespace Vulkan { 17namespace Vulkan {
17 18
18namespace { 19namespace {
19 20
20constexpr std::size_t POINT = 0; 21constexpr size_t POINT = 0;
21constexpr std::size_t LINE = 1; 22constexpr size_t LINE = 1;
22constexpr std::size_t POLYGON = 2; 23constexpr size_t POLYGON = 2;
23constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { 24constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
24 POINT, // Points 25 POINT, // Points
25 LINE, // Lines 26 LINE, // Lines
@@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
40 41
41} // Anonymous namespace 42} // Anonymous namespace
42 43
43void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { 44void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
44 const std::array enabled_lut = {regs.polygon_offset_point_enable, 45 bool has_extended_dynamic_state) {
45 regs.polygon_offset_line_enable, 46 const Maxwell& regs = maxwell3d.regs;
46 regs.polygon_offset_fill_enable}; 47 const std::array enabled_lut{
48 regs.polygon_offset_point_enable,
49 regs.polygon_offset_line_enable,
50 regs.polygon_offset_fill_enable,
51 };
47 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 52 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
48 53
49 raw1 = 0; 54 raw1 = 0;
@@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
64 69
65 raw2 = 0; 70 raw2 = 0;
66 const auto test_func = 71 const auto test_func =
67 regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; 72 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
68 alpha_test_func.Assign(PackComparisonOp(test_func)); 73 alpha_test_func.Assign(PackComparisonOp(test_func));
69 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); 74 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
70 75
71 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); 76 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
72 point_size = Common::BitCast<u32>(regs.point_size); 77 point_size = Common::BitCast<u32>(regs.point_size);
73 78
74 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 79 if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
75 binding_divisors[index] = 80 maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
76 regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; 81 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
82 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
83 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
84 }
77 } 85 }
78 86 if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
79 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 87 maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
80 const auto& input = regs.vertex_attrib_format[index]; 88 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
81 auto& attribute = attributes[index]; 89 const auto& input = regs.vertex_attrib_format[index];
82 attribute.raw = 0; 90 auto& attribute = attributes[index];
83 attribute.enabled.Assign(input.IsConstant() ? 0 : 1); 91 attribute.raw = 0;
84 attribute.buffer.Assign(input.buffer); 92 attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
85 attribute.offset.Assign(input.offset); 93 attribute.buffer.Assign(input.buffer);
86 attribute.type.Assign(static_cast<u32>(input.type.Value())); 94 attribute.offset.Assign(input.offset);
87 attribute.size.Assign(static_cast<u32>(input.size.Value())); 95 attribute.type.Assign(static_cast<u32>(input.type.Value()));
88 attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); 96 attribute.size.Assign(static_cast<u32>(input.size.Value()));
97 }
89 } 98 }
90 99 if (maxwell3d.dirty.flags[Dirty::Blending]) {
91 for (std::size_t index = 0; index < std::size(attachments); ++index) { 100 maxwell3d.dirty.flags[Dirty::Blending] = false;
92 attachments[index].Fill(regs, index); 101 for (size_t index = 0; index < attachments.size(); ++index) {
102 attachments[index].Refresh(regs, index);
103 }
104 }
105 if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
106 maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
107 const auto& transform = regs.viewport_transform;
108 std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
109 return static_cast<u16>(viewport.swizzle.raw);
110 });
93 } 111 }
94
95 const auto& transform = regs.viewport_transform;
96 std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
97 [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
98
99 if (!has_extended_dynamic_state) { 112 if (!has_extended_dynamic_state) {
100 no_extended_dynamic_state.Assign(1); 113 no_extended_dynamic_state.Assign(1);
101 dynamic_state.Fill(regs); 114 dynamic_state.Refresh(regs);
102 } 115 }
103} 116}
104 117
105void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { 118void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
106 const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; 119 const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
107 120
108 raw = 0; 121 raw = 0;
@@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
141 enable.Assign(1); 154 enable.Assign(1);
142} 155}
143 156
144void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { 157void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
145 u32 packed_front_face = PackFrontFace(regs.front_face); 158 u32 packed_front_face = PackFrontFace(regs.front_face);
146 if (regs.screen_y_control.triangle_rast_flip != 0) { 159 if (regs.screen_y_control.triangle_rast_flip != 0) {
147 // Flip front face 160 // Flip front face
@@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
178 }); 191 });
179} 192}
180 193
181std::size_t FixedPipelineState::Hash() const noexcept { 194size_t FixedPipelineState::Hash() const noexcept {
182 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); 195 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
183 return static_cast<std::size_t>(hash); 196 return static_cast<size_t>(hash);
184} 197}
185 198
186bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { 199bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 465a55fdb..a0eb83a68 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -58,7 +58,7 @@ struct FixedPipelineState {
58 BitField<30, 1, u32> enable; 58 BitField<30, 1, u32> enable;
59 }; 59 };
60 60
61 void Fill(const Maxwell& regs, std::size_t index); 61 void Refresh(const Maxwell& regs, size_t index);
62 62
63 constexpr std::array<bool, 4> Mask() const noexcept { 63 constexpr std::array<bool, 4> Mask() const noexcept {
64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; 64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
@@ -96,8 +96,6 @@ struct FixedPipelineState {
96 BitField<6, 14, u32> offset; 96 BitField<6, 14, u32> offset;
97 BitField<20, 3, u32> type; 97 BitField<20, 3, u32> type;
98 BitField<23, 6, u32> size; 98 BitField<23, 6, u32> size;
99 // Not really an element of a vertex attribute, but it can be packed here
100 BitField<29, 1, u32> binding_index_enabled;
101 99
102 constexpr Maxwell::VertexAttribute::Type Type() const noexcept { 100 constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
103 return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); 101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
@@ -108,7 +106,7 @@ struct FixedPipelineState {
108 } 106 }
109 }; 107 };
110 108
111 template <std::size_t Position> 109 template <size_t Position>
112 union StencilFace { 110 union StencilFace {
113 BitField<Position + 0, 3, u32> action_stencil_fail; 111 BitField<Position + 0, 3, u32> action_stencil_fail;
114 BitField<Position + 3, 3, u32> action_depth_fail; 112 BitField<Position + 3, 3, u32> action_depth_fail;
@@ -152,7 +150,7 @@ struct FixedPipelineState {
152 // Vertex stride is a 12 bits value, we have 4 bits to spare per element 150 // Vertex stride is a 12 bits value, we have 4 bits to spare per element
153 std::array<u16, Maxwell::NumVertexArrays> vertex_strides; 151 std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
154 152
155 void Fill(const Maxwell& regs); 153 void Refresh(const Maxwell& regs);
156 154
157 Maxwell::ComparisonOp DepthTestFunc() const noexcept { 155 Maxwell::ComparisonOp DepthTestFunc() const noexcept {
158 return UnpackComparisonOp(depth_test_func); 156 return UnpackComparisonOp(depth_test_func);
@@ -199,9 +197,9 @@ struct FixedPipelineState {
199 std::array<u16, Maxwell::NumViewports> viewport_swizzles; 197 std::array<u16, Maxwell::NumViewports> viewport_swizzles;
200 DynamicState dynamic_state; 198 DynamicState dynamic_state;
201 199
202 void Fill(const Maxwell& regs, bool has_extended_dynamic_state); 200 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
203 201
204 std::size_t Hash() const noexcept; 202 size_t Hash() const noexcept;
205 203
206 bool operator==(const FixedPipelineState& rhs) const noexcept; 204 bool operator==(const FixedPipelineState& rhs) const noexcept;
207 205
@@ -209,8 +207,8 @@ struct FixedPipelineState {
209 return !operator==(rhs); 207 return !operator==(rhs);
210 } 208 }
211 209
212 std::size_t Size() const noexcept { 210 size_t Size() const noexcept {
213 const std::size_t total_size = sizeof *this; 211 const size_t total_size = sizeof *this;
214 return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); 212 return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
215 } 213 }
216}; 214};
@@ -224,7 +222,7 @@ namespace std {
224 222
225template <> 223template <>
226struct hash<Vulkan::FixedPipelineState> { 224struct hash<Vulkan::FixedPipelineState> {
227 std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { 225 size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
228 return k.Hash(); 226 return k.Hash();
229 } 227 }
230}; 228};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
531 return {}; 531 return {};
532} 532}
533 533
534VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { 534VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
535 switch (index_format) { 535 switch (index_format) {
536 case Maxwell::IndexFormat::UnsignedByte: 536 case Maxwell::IndexFormat::UnsignedByte:
537 if (!device.IsExtIndexTypeUint8Supported()) {
538 UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
539 return VK_INDEX_TYPE_UINT16;
540 }
541 return VK_INDEX_TYPE_UINT8_EXT; 537 return VK_INDEX_TYPE_UINT8_EXT;
542 case Maxwell::IndexFormat::UnsignedShort: 538 case Maxwell::IndexFormat::UnsignedShort:
543 return VK_INDEX_TYPE_UINT16; 539 return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
53 53
54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); 54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
55 55
56VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); 56VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
57 57
58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); 58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
59 59
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 61796e33a..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
80 return separated_extensions; 80 return separated_extensions;
81} 81}
82 82
83Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
84 VkSurfaceKHR surface) {
85 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
86 const s32 device_index = Settings::values.vulkan_device.GetValue();
87 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
88 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
89 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
90 }
91 const vk::PhysicalDevice physical_device(devices[device_index], dld);
92 return Device(*instance, physical_device, surface, dld);
93}
83} // Anonymous namespace 94} // Anonymous namespace
84 95
85RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 96RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
86 Core::Frontend::EmuWindow& emu_window, 97 Core::Frontend::EmuWindow& emu_window,
87 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 98 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
88 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 99 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
89 : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, 100 : RendererBase(emu_window, std::move(context_)),
90 cpu_memory{cpu_memory_}, gpu{gpu_} {} 101 telemetry_session(telemetry_session_),
102 cpu_memory(cpu_memory_),
103 gpu(gpu_),
104 library(OpenLibrary()),
105 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
106 true, Settings::values.renderer_debug)),
107 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
108 surface(CreateSurface(instance, render_window)),
109 device(CreateDevice(instance, dld, *surface)),
110 memory_allocator(device, false),
111 state_tracker(gpu),
112 scheduler(device, state_tracker),
113 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
114 render_window.GetFramebufferLayout().height, false),
115 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
116 screen_info),
117 rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
118 memory_allocator, state_tracker, scheduler) {
119 Report();
120} catch (const vk::Exception& exception) {
121 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
122 throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
123}
91 124
92RendererVulkan::~RendererVulkan() { 125RendererVulkan::~RendererVulkan() {
93 ShutDown(); 126 void(device.GetLogical().WaitIdle());
94} 127}
95 128
96void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 129void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
101 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 134 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
102 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 135 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
103 const bool use_accelerated = 136 const bool use_accelerated =
104 rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 137 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
105 const bool is_srgb = use_accelerated && screen_info.is_srgb; 138 const bool is_srgb = use_accelerated && screen_info.is_srgb;
106 if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { 139 if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
107 swapchain->Create(layout.width, layout.height, is_srgb); 140 swapchain.Create(layout.width, layout.height, is_srgb);
108 blit_screen->Recreate(); 141 blit_screen.Recreate();
109 } 142 }
110 143
111 scheduler->WaitWorker(); 144 scheduler.WaitWorker();
112 145
113 swapchain->AcquireNextImage(); 146 swapchain.AcquireNextImage();
114 const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); 147 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
115 148
116 scheduler->Flush(render_semaphore); 149 scheduler.Flush(render_semaphore);
117 150
118 if (swapchain->Present(render_semaphore)) { 151 if (swapchain.Present(render_semaphore)) {
119 blit_screen->Recreate(); 152 blit_screen.Recreate();
120 } 153 }
121 154 rasterizer.TickFrame();
122 rasterizer->TickFrame();
123 } 155 }
124 156
125 render_window.OnFrameDisplayed(); 157 render_window.OnFrameDisplayed();
126} 158}
127 159
128bool RendererVulkan::Init() try {
129 library = OpenLibrary();
130 instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
131 true, Settings::values.renderer_debug);
132 if (Settings::values.renderer_debug) {
133 debug_callback = CreateDebugCallback(instance);
134 }
135 surface = CreateSurface(instance, render_window);
136
137 InitializeDevice();
138 Report();
139
140 memory_allocator = std::make_unique<MemoryAllocator>(*device);
141
142 state_tracker = std::make_unique<StateTracker>(gpu);
143
144 scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
145
146 const auto& framebuffer = render_window.GetFramebufferLayout();
147 swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
148 swapchain->Create(framebuffer.width, framebuffer.height, false);
149
150 rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
151 cpu_memory, screen_info, *device,
152 *memory_allocator, *state_tracker, *scheduler);
153
154 blit_screen =
155 std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
156 *memory_allocator, *swapchain, *scheduler, screen_info);
157 return true;
158
159} catch (const vk::Exception& exception) {
160 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
161 return false;
162}
163
164void RendererVulkan::ShutDown() {
165 if (!device) {
166 return;
167 }
168 if (const auto& dev = device->GetLogical()) {
169 dev.WaitIdle();
170 }
171 rasterizer.reset();
172 blit_screen.reset();
173 scheduler.reset();
174 swapchain.reset();
175 memory_allocator.reset();
176 device.reset();
177}
178
179void RendererVulkan::InitializeDevice() {
180 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
181 const s32 device_index = Settings::values.vulkan_device.GetValue();
182 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
183 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
184 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
185 }
186 const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
187 device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
188}
189
190void RendererVulkan::Report() const { 160void RendererVulkan::Report() const {
191 const std::string vendor_name{device->GetVendorName()}; 161 const std::string vendor_name{device.GetVendorName()};
192 const std::string model_name{device->GetModelName()}; 162 const std::string model_name{device.GetModelName()};
193 const std::string driver_version = GetDriverVersion(*device); 163 const std::string driver_version = GetDriverVersion(device);
194 const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); 164 const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
195 165
196 const std::string api_version = GetReadableVersion(device->ApiVersion()); 166 const std::string api_version = GetReadableVersion(device.ApiVersion());
197 167
198 const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); 168 const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
199 169
200 LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); 170 LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
201 LOG_INFO(Render_Vulkan, "Device: {}", model_name); 171 LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
209 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 179 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
210} 180}
211 181
212std::vector<std::string> RendererVulkan::EnumerateDevices() try {
213 vk::InstanceDispatch dld;
214 const Common::DynamicLibrary library = OpenLibrary();
215 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
216 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
217 std::vector<std::string> names;
218 names.reserve(physical_devices.size());
219 for (const VkPhysicalDevice device : physical_devices) {
220 names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
221 }
222 return names;
223
224} catch (const vk::Exception& exception) {
225 LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
226 return {};
227}
228
229} // namespace Vulkan 182} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index daf55b9b4..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,8 +9,14 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/dynamic_library.h" 11#include "common/dynamic_library.h"
12
13#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/renderer_vulkan/vk_blit_screen.h"
14#include "video_core/renderer_vulkan/vk_rasterizer.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_state_tracker.h"
17#include "video_core/renderer_vulkan/vk_swapchain.h"
18#include "video_core/vulkan_common/vulkan_device.h"
19#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 20#include "video_core/vulkan_common/vulkan_wrapper.h"
15 21
16namespace Core { 22namespace Core {
@@ -27,20 +33,6 @@ class GPU;
27 33
28namespace Vulkan { 34namespace Vulkan {
29 35
30class Device;
31class StateTracker;
32class MemoryAllocator;
33class VKBlitScreen;
34class VKSwapchain;
35class VKScheduler;
36
37struct VKScreenInfo {
38 VkImageView image_view{};
39 u32 width{};
40 u32 height{};
41 bool is_srgb{};
42};
43
44class RendererVulkan final : public VideoCore::RendererBase { 36class RendererVulkan final : public VideoCore::RendererBase {
45public: 37public:
46 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 38 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -49,15 +41,13 @@ public:
49 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 41 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
50 ~RendererVulkan() override; 42 ~RendererVulkan() override;
51 43
52 bool Init() override;
53 void ShutDown() override;
54 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 44 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
55 45
56 static std::vector<std::string> EnumerateDevices(); 46 VideoCore::RasterizerInterface* ReadRasterizer() override {
47 return &rasterizer;
48 }
57 49
58private: 50private:
59 void InitializeDevice();
60
61 void Report() const; 51 void Report() const;
62 52
63 Core::TelemetrySession& telemetry_session; 53 Core::TelemetrySession& telemetry_session;
@@ -68,18 +58,18 @@ private:
68 vk::InstanceDispatch dld; 58 vk::InstanceDispatch dld;
69 59
70 vk::Instance instance; 60 vk::Instance instance;
71 61 vk::DebugUtilsMessenger debug_callback;
72 vk::SurfaceKHR surface; 62 vk::SurfaceKHR surface;
73 63
74 VKScreenInfo screen_info; 64 VKScreenInfo screen_info;
75 65
76 vk::DebugUtilsMessenger debug_callback; 66 Device device;
77 std::unique_ptr<Device> device; 67 MemoryAllocator memory_allocator;
78 std::unique_ptr<MemoryAllocator> memory_allocator; 68 StateTracker state_tracker;
79 std::unique_ptr<StateTracker> state_tracker; 69 VKScheduler scheduler;
80 std::unique_ptr<VKScheduler> scheduler; 70 VKSwapchain swapchain;
81 std::unique_ptr<VKSwapchain> swapchain; 71 VKBlitScreen blit_screen;
82 std::unique_ptr<VKBlitScreen> blit_screen; 72 RasterizerVulkan rasterizer;
83}; 73};
84 74
85} // namespace Vulkan 75} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 3e3b895e0..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -18,7 +18,6 @@
18#include "video_core/gpu.h" 18#include "video_core/gpu.h"
19#include "video_core/host_shaders/vulkan_present_frag_spv.h" 19#include "video_core/host_shaders/vulkan_present_frag_spv.h"
20#include "video_core/host_shaders/vulkan_present_vert_spv.h" 20#include "video_core/host_shaders/vulkan_present_vert_spv.h"
21#include "video_core/rasterizer_interface.h"
22#include "video_core/renderer_vulkan/renderer_vulkan.h" 21#include "video_core/renderer_vulkan/renderer_vulkan.h"
23#include "video_core/renderer_vulkan/vk_blit_screen.h" 22#include "video_core/renderer_vulkan/vk_blit_screen.h"
24#include "video_core/renderer_vulkan/vk_master_semaphore.h" 23#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
113}; 112};
114 113
115VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, 114VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
116 Core::Frontend::EmuWindow& render_window_, 115 Core::Frontend::EmuWindow& render_window_, const Device& device_,
117 VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
118 MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, 116 MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
119 VKScheduler& scheduler_, const VKScreenInfo& screen_info_) 117 VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
120 : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, 118 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
121 device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, 119 memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
122 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 120 image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
123 resource_ticks.resize(image_count); 121 resource_ticks.resize(image_count);
124 122
125 CreateStaticResources(); 123 CreateStaticResources();
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
150 SetUniformData(data, framebuffer); 148 SetUniformData(data, framebuffer);
151 SetVertexData(data, framebuffer); 149 SetVertexData(data, framebuffer);
152 150
153 const std::span<u8> map = buffer_commit.Map(); 151 const std::span<u8> mapped_span = buffer_commit.Map();
154 std::memcpy(map.data(), &data, sizeof(data)); 152 std::memcpy(mapped_span.data(), &data, sizeof(data));
155 153
156 if (!use_accelerated) { 154 if (!use_accelerated) {
157 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 155 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
159 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 157 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
160 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 158 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
161 const size_t size_bytes = GetSizeInBytes(framebuffer); 159 const size_t size_bytes = GetSizeInBytes(framebuffer);
162 rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
163 160
164 // TODO(Rodrigo): Read this from HLE 161 // TODO(Rodrigo): Read this from HLE
165 constexpr u32 block_height_log2 = 4; 162 constexpr u32 block_height_log2 = 4;
166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); 163 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
167 Tegra::Texture::UnswizzleTexture( 164 Tegra::Texture::UnswizzleTexture(
168 map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, 165 mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
169 framebuffer.width, framebuffer.height, 1, block_height_log2, 0); 166 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
170 167
171 const VkBufferImageCopy copy{ 168 const VkBufferImageCopy copy{
172 .bufferOffset = image_offset, 169 .bufferOffset = image_offset,
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
266 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
267 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
268 }); 265 });
269
270 return *semaphores[image_index]; 266 return *semaphores[image_index];
271} 267}
272 268
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index b52576957..5e3177685 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -38,12 +38,18 @@ class RasterizerVulkan;
38class VKScheduler; 38class VKScheduler;
39class VKSwapchain; 39class VKSwapchain;
40 40
41class VKBlitScreen final { 41struct VKScreenInfo {
42 VkImageView image_view{};
43 u32 width{};
44 u32 height{};
45 bool is_srgb{};
46};
47
48class VKBlitScreen {
42public: 49public:
43 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, 50 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
44 Core::Frontend::EmuWindow& render_window, 51 Core::Frontend::EmuWindow& render_window, const Device& device,
45 VideoCore::RasterizerInterface& rasterizer, const Device& device, 52 MemoryAllocator& memory_manager, VKSwapchain& swapchain,
46 MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
47 VKScheduler& scheduler, const VKScreenInfo& screen_info); 53 VKScheduler& scheduler, const VKScreenInfo& screen_info);
48 ~VKBlitScreen(); 54 ~VKBlitScreen();
49 55
@@ -84,7 +90,6 @@ private:
84 90
85 Core::Memory::Memory& cpu_memory; 91 Core::Memory::Memory& cpu_memory;
86 Core::Frontend::EmuWindow& render_window; 92 Core::Frontend::EmuWindow& render_window;
87 VideoCore::RasterizerInterface& rasterizer;
88 const Device& device; 93 const Device& device;
89 MemoryAllocator& memory_allocator; 94 MemoryAllocator& memory_allocator;
90 VKSwapchain& swapchain; 95 VKSwapchain& swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..848eedd66 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,308 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstring> 7#include <cstring>
7#include <memory> 8#include <span>
9#include <vector>
8 10
9#include "core/core.h"
10#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h" 13#include "video_core/renderer_vulkan/vk_buffer_cache.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
13#include "video_core/renderer_vulkan/vk_stream_buffer.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
14#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
16 20
17namespace Vulkan { 21namespace Vulkan {
18
19namespace { 22namespace {
23VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
24 return VkBufferCopy{
25 .srcOffset = copy.src_offset,
26 .dstOffset = copy.dst_offset,
27 .size = copy.size,
28 };
29}
20 30
21constexpr VkBufferUsageFlags BUFFER_USAGE = 31VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
22 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | 32 if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
23 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; 33 return VK_INDEX_TYPE_UINT8_EXT;
24 34 }
25constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = 35 if (num_elements <= 0xffff) {
26 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 36 return VK_INDEX_TYPE_UINT16;
27 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 37 }
28 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; 38 return VK_INDEX_TYPE_UINT32;
29 39}
30constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 40
34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = 41size_t BytesPerIndex(VkIndexType index_type) {
35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; 42 switch (index_type) {
43 case VK_INDEX_TYPE_UINT8_EXT:
44 return 1;
45 case VK_INDEX_TYPE_UINT16:
46 return 2;
47 case VK_INDEX_TYPE_UINT32:
48 return 4;
49 default:
50 UNREACHABLE_MSG("Invalid index type={}", index_type);
51 return 1;
52 }
53}
36 54
55template <typename T>
56std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
57 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
58 std::ranges::transform(indices, indices.begin(),
59 [quad, first](u32 index) { return first + index + quad * 4; });
60 return indices;
61}
37} // Anonymous namespace 62} // Anonymous namespace
38 63
39Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, 64Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
40 StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) 65 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
41 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ 66
42 staging_pool_} { 67Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
43 buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 68 VAddr cpu_addr_, u64 size_bytes_)
69 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
70 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 71 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 72 .pNext = nullptr,
46 .flags = 0, 73 .flags = 0,
47 .size = static_cast<VkDeviceSize>(size_), 74 .size = SizeBytes(),
48 .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 75 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
76 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
77 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
79 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
49 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 80 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
50 .queueFamilyIndexCount = 0, 81 .queueFamilyIndexCount = 0,
51 .pQueueFamilyIndices = nullptr, 82 .pQueueFamilyIndices = nullptr,
52 }); 83 });
53 commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 84 if (runtime.device.HasDebuggingToolAttached()) {
85 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
86 }
87 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
54} 88}
55 89
56Buffer::~Buffer() = default; 90BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
91 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 VKDescriptorPool& descriptor_pool)
94 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
95 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
96 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
97 quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
57 98
58void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { 99StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
59 const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); 100 return staging_pool.Request(size, MemoryUsage::Upload);
60 std::memcpy(staging.mapped_span.data(), data, data_size); 101}
61 102
62 scheduler.RequestOutsideRenderPassOperationContext(); 103StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
104 return staging_pool.Request(size, MemoryUsage::Download);
105}
63 106
64 const VkBuffer handle = Handle(); 107void BufferCacheRuntime::Finish() {
65 scheduler.Record([staging = staging.buffer, handle, offset, data_size, 108 scheduler.Finish();
66 &device = device](vk::CommandBuffer cmdbuf) { 109}
67 const VkBufferMemoryBarrier read_barrier{ 110
68 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 111void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
69 .pNext = nullptr, 112 std::span<const VideoCommon::BufferCopy> copies) {
70 .srcAccessMask = 113 static constexpr VkMemoryBarrier READ_BARRIER{
71 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | 114 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
72 VK_ACCESS_HOST_WRITE_BIT | 115 .pNext = nullptr,
73 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), 116 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
74 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 117 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
75 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 118 };
76 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 119 static constexpr VkMemoryBarrier WRITE_BARRIER{
77 .buffer = handle, 120 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
78 .offset = offset, 121 .pNext = nullptr,
79 .size = data_size, 122 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
80 }; 123 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
81 const VkBufferMemoryBarrier write_barrier{ 124 };
82 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 125 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
83 .pNext = nullptr, 126 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
84 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 127 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
85 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 128 scheduler.RequestOutsideRenderPassOperationContext();
86 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 129 scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
87 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
88 .buffer = handle,
89 .offset = offset,
90 .size = data_size,
91 };
92 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 130 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
93 0, read_barrier); 131 0, READ_BARRIER);
94 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); 132 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
95 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, 133 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
96 write_barrier); 134 0, WRITE_BARRIER);
97 }); 135 });
98} 136}
99 137
100void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
101 auto staging = staging_pool.Request(data_size, MemoryUsage::Download); 139 u32 base_vertex, u32 num_indices, VkBuffer buffer,
102 scheduler.RequestOutsideRenderPassOperationContext(); 140 u32 offset, [[maybe_unused]] u32 size) {
141 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
142 VkDeviceSize vk_offset = offset;
143 VkBuffer vk_buffer = buffer;
144 if (topology == PrimitiveTopology::Quads) {
145 vk_index_type = VK_INDEX_TYPE_UINT32;
146 std::tie(vk_buffer, vk_offset) =
147 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
148 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
149 vk_index_type = VK_INDEX_TYPE_UINT16;
150 std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
151 }
152 if (vk_buffer == VK_NULL_HANDLE) {
153 // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
154 ReserveNullIndexBuffer();
155 vk_buffer = *null_index_buffer;
156 }
157 scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
158 cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
159 });
160}
103 161
104 const VkBuffer handle = Handle(); 162void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
105 scheduler.Record( 163 ReserveQuadArrayLUT(first + count, true);
106 [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
107 const VkBufferMemoryBarrier barrier{
108 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
109 .pNext = nullptr,
110 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
111 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
112 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
113 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
114 .buffer = handle,
115 .offset = offset,
116 .size = data_size,
117 };
118
119 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
120 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
121 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
122 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
123 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
124 });
125 scheduler.Finish();
126 164
127 std::memcpy(data, staging.mapped_span.data(), data_size); 165 // The LUT has the indices 0, 1, 2, and 3 copied as an array
166 // To apply these 'first' offsets we can apply an offset based on the modulus.
167 const VkIndexType index_type = quad_array_lut_index_type;
168 const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
169 const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
170 scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
171 cmdbuf.BindIndexBuffer(buffer, offset, index_type);
172 });
128} 173}
129 174
130void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 175void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
131 std::size_t copy_size) { 176 u32 stride) {
132 scheduler.RequestOutsideRenderPassOperationContext(); 177 if (device.IsExtExtendedDynamicStateSupported()) {
178 scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
179 const VkDeviceSize vk_offset = offset;
180 const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
181 const VkDeviceSize vk_stride = stride;
182 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
183 });
184 } else {
185 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
186 cmdbuf.BindVertexBuffer(index, buffer, offset);
187 });
188 }
189}
133 190
134 const VkBuffer dst_buffer = Handle(); 191void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
135 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, 192 u32 size) {
136 copy_size](vk::CommandBuffer cmdbuf) { 193 if (!device.IsExtTransformFeedbackSupported()) {
137 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); 194 // Already logged in the rasterizer
138 195 return;
139 std::array<VkBufferMemoryBarrier, 2> barriers; 196 }
140 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 197 scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
141 barriers[0].pNext = nullptr; 198 const VkDeviceSize vk_offset = offset;
142 barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 199 const VkDeviceSize vk_size = size;
143 barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; 200 cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
144 barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
145 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
146 barriers[0].buffer = src_buffer;
147 barriers[0].offset = src_offset;
148 barriers[0].size = copy_size;
149 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
150 barriers[1].pNext = nullptr;
151 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
152 barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
153 barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
154 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
155 barriers[1].buffer = dst_buffer;
156 barriers[1].offset = dst_offset;
157 barriers[1].size = copy_size;
158 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
159 barriers, {});
160 }); 201 });
161} 202}
162 203
163VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 204void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
164 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 205 update_descriptor_queue.AddBuffer(buffer, offset, size);
165 const Device& device_, MemoryAllocator& memory_allocator_, 206}
166 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
167 StagingBufferPool& staging_pool_)
168 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
169 cpu_memory_, stream_buffer_},
170 device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
171 staging_pool{staging_pool_} {}
172 207
173VKBufferCache::~VKBufferCache() = default; 208void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
209 if (num_indices <= current_num_indices) {
210 return;
211 }
212 if (wait_for_idle) {
213 scheduler.Finish();
214 }
215 current_num_indices = num_indices;
216 quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
174 217
175std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 218 const u32 num_quads = num_indices / 4;
176 return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, 219 const u32 num_triangle_indices = num_quads * 6;
177 size); 220 const u32 num_first_offset_copies = 4;
221 const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
222 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
223 quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
224 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
225 .pNext = nullptr,
226 .flags = 0,
227 .size = size_bytes,
228 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
229 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
230 .queueFamilyIndexCount = 0,
231 .pQueueFamilyIndices = nullptr,
232 });
233 if (device.HasDebuggingToolAttached()) {
234 quad_array_lut.SetObjectNameEXT("Quad LUT");
235 }
236 quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
237
238 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
239 u8* staging_data = staging.mapped_span.data();
240 const size_t quad_size = bytes_per_index * 6;
241 for (u32 first = 0; first < num_first_offset_copies; ++first) {
242 for (u32 quad = 0; quad < num_quads; ++quad) {
243 switch (quad_array_lut_index_type) {
244 case VK_INDEX_TYPE_UINT8_EXT:
245 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
246 break;
247 case VK_INDEX_TYPE_UINT16:
248 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
249 break;
250 case VK_INDEX_TYPE_UINT32:
251 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
252 break;
253 default:
254 UNREACHABLE();
255 break;
256 }
257 staging_data += quad_size;
258 }
259 }
260 scheduler.RequestOutsideRenderPassOperationContext();
261 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
262 dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
263 const VkBufferCopy copy{
264 .srcOffset = src_offset,
265 .dstOffset = 0,
266 .size = size_bytes,
267 };
268 const VkBufferMemoryBarrier write_barrier{
269 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
270 .pNext = nullptr,
271 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
272 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
273 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
274 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
275 .buffer = dst_buffer,
276 .offset = 0,
277 .size = size_bytes,
278 };
279 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
280 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
281 0, write_barrier);
282 });
178} 283}
179 284
180VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { 285void BufferCacheRuntime::ReserveNullIndexBuffer() {
181 size = std::max(size, std::size_t(4)); 286 if (null_index_buffer) {
182 const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); 287 return;
288 }
289 null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
290 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
291 .pNext = nullptr,
292 .flags = 0,
293 .size = 4,
294 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
295 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
296 .queueFamilyIndexCount = 0,
297 .pQueueFamilyIndices = nullptr,
298 });
299 if (device.HasDebuggingToolAttached()) {
300 null_index_buffer.SetObjectNameEXT("Null index buffer");
301 }
302 null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
303
183 scheduler.RequestOutsideRenderPassOperationContext(); 304 scheduler.RequestOutsideRenderPassOperationContext();
184 scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { 305 scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
185 cmdbuf.FillBuffer(buffer, 0, size, 0); 306 cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
186 }); 307 });
187 return {empty.buffer, 0, 0};
188} 308}
189 309
190} // namespace Vulkan 310} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..041e6515c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,124 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8
9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 7#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_compute_pass.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
12#include "video_core/renderer_vulkan/vk_stream_buffer.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 11#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 12#include "video_core/vulkan_common/vulkan_wrapper.h"
15 13
16namespace Vulkan { 14namespace Vulkan {
17 15
18class Device; 16class Device;
17class VKDescriptorPool;
19class VKScheduler; 18class VKScheduler;
19class VKUpdateDescriptorQueue;
20 20
21class Buffer final : public VideoCommon::BufferBlock { 21class BufferCacheRuntime;
22public:
23 explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
24 StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
25 ~Buffer();
26
27 void Upload(std::size_t offset, std::size_t data_size, const u8* data);
28 22
29 void Download(std::size_t offset, std::size_t data_size, u8* data); 23class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
30 24public:
31 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 25 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
32 std::size_t copy_size); 26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_);
33 28
34 VkBuffer Handle() const { 29 [[nodiscard]] VkBuffer Handle() const noexcept {
35 return *buffer; 30 return *buffer;
36 } 31 }
37 32
38 u64 Address() const { 33 operator VkBuffer() const noexcept {
39 return 0; 34 return *buffer;
40 } 35 }
41 36
42private: 37private:
43 const Device& device;
44 VKScheduler& scheduler;
45 StagingBufferPool& staging_pool;
46
47 vk::Buffer buffer; 38 vk::Buffer buffer;
48 MemoryCommit commit; 39 MemoryCommit commit;
49}; 40};
50 41
51class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 42class BufferCacheRuntime {
43 friend Buffer;
44
45 using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
46 using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
47
52public: 48public:
53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, 49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
55 const Device& device, MemoryAllocator& memory_allocator, 51 VKUpdateDescriptorQueue& update_descriptor_queue_,
56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer, 52 VKDescriptorPool& descriptor_pool);
57 StagingBufferPool& staging_pool); 53
58 ~VKBufferCache(); 54 void Finish();
55
56 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
57
58 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
59
60 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
61 std::span<const VideoCommon::BufferCopy> copies);
62
63 void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
64 u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
59 65
60 BufferInfo GetEmptyBuffer(std::size_t size) override; 66 void BindQuadArrayIndexBuffer(u32 first, u32 count);
61 67
62protected: 68 void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
63 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 69
70 void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
71
72 std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
73 [[maybe_unused]] u32 binding_index, u32 size) {
74 const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
75 BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
76 return ref.mapped_span;
77 }
78
79 void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
80 BindBuffer(buffer, offset, size);
81 }
82
83 void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
84 [[maybe_unused]] bool is_written) {
85 BindBuffer(buffer, offset, size);
86 }
64 87
65private: 88private:
89 void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
90
91 void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
92
93 void ReserveNullIndexBuffer();
94
66 const Device& device; 95 const Device& device;
67 MemoryAllocator& memory_allocator; 96 MemoryAllocator& memory_allocator;
68 VKScheduler& scheduler; 97 VKScheduler& scheduler;
69 StagingBufferPool& staging_pool; 98 StagingBufferPool& staging_pool;
99 VKUpdateDescriptorQueue& update_descriptor_queue;
100
101 vk::Buffer quad_array_lut;
102 MemoryCommit quad_array_lut_commit;
103 VkIndexType quad_array_lut_index_type{};
104 u32 current_num_indices = 0;
105
106 vk::Buffer null_index_buffer;
107 MemoryCommit null_index_buffer_commit;
108
109 Uint8Pass uint8_pass;
110 QuadIndexedPass quad_index_pass;
70}; 111};
71 112
113struct BufferCacheParams {
114 using Runtime = Vulkan::BufferCacheRuntime;
115 using Buffer = Vulkan::Buffer;
116
117 static constexpr bool IS_OPENGL = false;
118 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
119 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
120 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
121 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
122 static constexpr bool USE_MEMORY_MAPS = true;
123};
124
125using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
126
72} // namespace Vulkan 127} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +22,7 @@
22#include "video_core/vulkan_common/vulkan_wrapper.h" 22#include "video_core/vulkan_common/vulkan_wrapper.h"
23 23
24namespace Vulkan { 24namespace Vulkan {
25
26namespace { 25namespace {
27
28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
29 return {
30 .binding = 0,
31 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
32 .descriptorCount = 1,
33 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
34 .pImmutableSamplers = nullptr,
35 };
36}
37
38VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
39 return {
40 .dstBinding = 0,
41 .dstArrayElement = 0,
42 .descriptorCount = 1,
43 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
44 .offset = 0,
45 .stride = sizeof(DescriptorUpdateEntry),
46 };
47}
48
49VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 26VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
50 return { 27 return {
51 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 28 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
162 return set; 139 return set;
163} 140}
164 141
165QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
166 VKDescriptorPool& descriptor_pool_,
167 StagingBufferPool& staging_buffer_pool_,
168 VKUpdateDescriptorQueue& update_descriptor_queue_)
169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
173 update_descriptor_queue{update_descriptor_queue_} {}
174
175QuadArrayPass::~QuadArrayPass() = default;
176
177std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
178 const u32 num_triangle_vertices = (num_vertices / 4) * 6;
179 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
180 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
181
182 update_descriptor_queue.Acquire();
183 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
184 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
185
186 scheduler.RequestOutsideRenderPassOperationContext();
187
188 ASSERT(num_vertices % 4 == 0);
189 const u32 num_quads = num_vertices / 4;
190 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
191 num_quads, first, set](vk::CommandBuffer cmdbuf) {
192 constexpr u32 dispatch_size = 1024;
193 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
195 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
196 cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
197
198 VkBufferMemoryBarrier barrier;
199 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
200 barrier.pNext = nullptr;
201 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
202 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
203 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
204 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
205 barrier.buffer = buffer;
206 barrier.offset = 0;
207 barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
208 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
209 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
210 });
211 return {staging_ref.buffer, 0};
212}
213
214Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, 142Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
215 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, 143 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
216 VKUpdateDescriptorQueue& update_descriptor_queue_) 144 VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
221 149
222Uint8Pass::~Uint8Pass() = default; 150Uint8Pass::~Uint8Pass() = default;
223 151
224std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, 152std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
225 u64 src_offset) { 153 u32 src_offset) {
226 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 154 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
227 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 155 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
228 156
229 update_descriptor_queue.Acquire(); 157 update_descriptor_queue.Acquire();
230 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 158 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
231 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 159 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
232 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 160 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
233 161
234 scheduler.RequestOutsideRenderPassOperationContext(); 162 scheduler.RequestOutsideRenderPassOperationContext();
235 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 163 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
236 num_vertices](vk::CommandBuffer cmdbuf) { 164 num_vertices](vk::CommandBuffer cmdbuf) {
237 constexpr u32 dispatch_size = 1024; 165 static constexpr u32 DISPATCH_SIZE = 1024;
166 static constexpr VkMemoryBarrier WRITE_BARRIER{
167 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
168 .pNext = nullptr,
169 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
170 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
171 };
238 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 172 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
239 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 173 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
240 cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); 174 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
241
242 VkBufferMemoryBarrier barrier;
243 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
244 barrier.pNext = nullptr;
245 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
246 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
247 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
248 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
249 barrier.buffer = buffer;
250 barrier.offset = 0;
251 barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
252 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 175 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
253 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 176 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
254 }); 177 });
255 return {staging_ref.buffer, 0}; 178 return {staging.buffer, staging.offset};
256} 179}
257 180
258QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 181QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
267 190
268QuadIndexedPass::~QuadIndexedPass() = default; 191QuadIndexedPass::~QuadIndexedPass() = default;
269 192
270std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( 193std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
271 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 194 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
272 VkBuffer src_buffer, u64 src_offset) { 195 VkBuffer src_buffer, u32 src_offset) {
273 const u32 index_shift = [index_format] { 196 const u32 index_shift = [index_format] {
274 switch (index_format) { 197 switch (index_format) {
275 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: 198 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
286 const u32 num_tri_vertices = (num_vertices / 4) * 6; 209 const u32 num_tri_vertices = (num_vertices / 4) * 6;
287 210
288 const std::size_t staging_size = num_tri_vertices * sizeof(u32); 211 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
289 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 212 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
290 213
291 update_descriptor_queue.Acquire(); 214 update_descriptor_queue.Acquire();
292 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 215 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
293 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 216 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
294 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 217 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
295 218
296 scheduler.RequestOutsideRenderPassOperationContext(); 219 scheduler.RequestOutsideRenderPassOperationContext();
297 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 220 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
298 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 221 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
299 static constexpr u32 dispatch_size = 1024; 222 static constexpr u32 DISPATCH_SIZE = 1024;
223 static constexpr VkMemoryBarrier WRITE_BARRIER{
224 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
225 .pNext = nullptr,
226 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
227 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
228 };
300 const std::array push_constants = {base_vertex, index_shift}; 229 const std::array push_constants = {base_vertex, index_shift};
301 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 230 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
302 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 231 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
303 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 232 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
304 &push_constants); 233 &push_constants);
305 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); 234 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
306
307 VkBufferMemoryBarrier barrier;
308 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
309 barrier.pNext = nullptr;
310 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
311 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
312 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
313 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
314 barrier.buffer = buffer;
315 barrier.offset = 0;
316 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
317 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 235 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
318 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 236 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
319 }); 237 });
320 return {staging_ref.buffer, 0}; 238 return {staging.buffer, staging.offset};
321} 239}
322 240
323} // namespace Vulkan 241} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
41 vk::ShaderModule module; 41 vk::ShaderModule module;
42}; 42};
43 43
44class QuadArrayPass final : public VKComputePass {
45public:
46 explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
47 VKDescriptorPool& descriptor_pool_,
48 StagingBufferPool& staging_buffer_pool_,
49 VKUpdateDescriptorQueue& update_descriptor_queue_);
50 ~QuadArrayPass();
51
52 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
53
54private:
55 VKScheduler& scheduler;
56 StagingBufferPool& staging_buffer_pool;
57 VKUpdateDescriptorQueue& update_descriptor_queue;
58};
59
60class Uint8Pass final : public VKComputePass { 44class Uint8Pass final : public VKComputePass {
61public: 45public:
62 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 46 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,10 @@ public:
64 VKUpdateDescriptorQueue& update_descriptor_queue_); 48 VKUpdateDescriptorQueue& update_descriptor_queue_);
65 ~Uint8Pass(); 49 ~Uint8Pass();
66 50
67 std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); 51 /// Assemble uint8 indices into an uint16 index buffer
52 /// Returns a pair with the staging buffer, and the offset where the assembled data is
53 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
54 u32 src_offset);
68 55
69private: 56private:
70 VKScheduler& scheduler; 57 VKScheduler& scheduler;
@@ -80,9 +67,9 @@ public:
80 VKUpdateDescriptorQueue& update_descriptor_queue_); 67 VKUpdateDescriptorQueue& update_descriptor_queue_);
81 ~QuadIndexedPass(); 68 ~QuadIndexedPass();
82 69
83 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 70 std::pair<VkBuffer, VkDeviceSize> Assemble(
84 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, 71 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
85 u64 src_offset); 72 u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
86 73
87private: 74private:
88 VKScheduler& scheduler; 75 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
45} 45}
46 46
47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
48 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 48 TextureCache& texture_cache_, BufferCache& buffer_cache_,
49 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 49 VKQueryCache& query_cache_, const Device& device_,
50 VKScheduler& scheduler_) 50 VKScheduler& scheduler_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
52 scheduler{scheduler_} {} 52 scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
22namespace Vulkan { 22namespace Vulkan {
23 23
24class Device; 24class Device;
25class VKBufferCache;
26class VKQueryCache; 25class VKQueryCache;
27class VKScheduler; 26class VKScheduler;
28 27
@@ -45,14 +44,14 @@ private:
45using Fence = std::shared_ptr<InnerFence>; 44using Fence = std::shared_ptr<InnerFence>;
46 45
47using GenericFenceManager = 46using GenericFenceManager =
48 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; 47 VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
49 48
50class VKFenceManager final : public GenericFenceManager { 49class VKFenceManager final : public GenericFenceManager {
51public: 50public:
52 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 51 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
53 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 52 TextureCache& texture_cache, BufferCache& buffer_cache,
54 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 53 VKQueryCache& query_cache, const Device& device,
55 VKScheduler& scheduler_); 54 VKScheduler& scheduler);
56 55
57protected: 56protected:
58 Fence CreateFence(u32 value, bool is_stubbed) override; 57 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d50dca604..fc6dd83eb 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
221 std::vector<VkVertexInputBindingDescription> vertex_bindings; 221 std::vector<VkVertexInputBindingDescription> vertex_bindings;
222 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; 222 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
223 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 223 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
224 if (state.attributes[index].binding_index_enabled == 0) {
225 continue;
226 }
227 const bool instanced = state.binding_divisors[index] != 0; 224 const bool instanced = state.binding_divisors[index] != 0;
228 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; 225 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
229 vertex_bindings.push_back({ 226 vertex_bindings.push_back({
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index f336f1862..2c7ed654d 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -21,7 +21,12 @@ public:
21 21
22 /// Returns the current logical tick. 22 /// Returns the current logical tick.
23 [[nodiscard]] u64 CurrentTick() const noexcept { 23 [[nodiscard]] u64 CurrentTick() const noexcept {
24 return current_tick; 24 return current_tick.load(std::memory_order_relaxed);
25 }
26
27 /// Returns the last known GPU tick.
28 [[nodiscard]] u64 KnownGpuTick() const noexcept {
29 return gpu_tick.load(std::memory_order_relaxed);
25 } 30 }
26 31
27 /// Returns the timeline semaphore handle. 32 /// Returns the timeline semaphore handle.
@@ -31,7 +36,7 @@ public:
31 36
32 /// Returns true when a tick has been hit by the GPU. 37 /// Returns true when a tick has been hit by the GPU.
33 [[nodiscard]] bool IsFree(u64 tick) { 38 [[nodiscard]] bool IsFree(u64 tick) {
34 return gpu_tick >= tick; 39 return gpu_tick.load(std::memory_order_relaxed) >= tick;
35 } 40 }
36 41
37 /// Advance to the logical tick. 42 /// Advance to the logical tick.
@@ -41,7 +46,7 @@ public:
41 46
42 /// Refresh the known GPU tick 47 /// Refresh the known GPU tick
43 void Refresh() { 48 void Refresh() {
44 gpu_tick = semaphore.GetCounter(); 49 gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed);
45 } 50 }
46 51
47 /// Waits for a tick to be hit on the GPU 52 /// Waits for a tick to be hit on the GPU
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..dfd38f575 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
8#include <mutex> 8#include <mutex>
9#include <vector> 9#include <vector>
10 10
11#include <boost/container/static_vector.hpp>
12
13#include "common/alignment.h" 11#include "common/alignment.h"
14#include "common/assert.h" 12#include "common/assert.h"
15#include "common/logging/log.h" 13#include "common/logging/log.h"
@@ -24,7 +22,6 @@
24#include "video_core/renderer_vulkan/maxwell_to_vk.h" 22#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h" 23#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); 48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); 49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
57MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
58MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
59MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
60 51
61namespace { 52namespace {
53struct DrawParams {
54 u32 base_instance;
55 u32 num_instances;
56 u32 base_vertex;
57 u32 num_vertices;
58 bool is_indexed;
59};
62 60
63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); 61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
64 62
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
67 const float width = src.scale_x * 2.0f; 65 const float width = src.scale_x * 2.0f;
68 const float height = src.scale_y * 2.0f; 66 const float height = src.scale_y * 2.0f;
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; 67 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
70
71 VkViewport viewport{ 68 VkViewport viewport{
72 .x = src.translate_x - src.scale_x, 69 .x = src.translate_x - src.scale_x,
73 .y = src.translate_y - src.scale_y, 70 .y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
76 .minDepth = src.translate_z - src.scale_z * reduce_z, 73 .minDepth = src.translate_z - src.scale_z * reduce_z,
77 .maxDepth = src.translate_z + src.scale_z, 74 .maxDepth = src.translate_z + src.scale_z,
78 }; 75 };
79
80 if (!device.IsExtDepthRangeUnrestrictedSupported()) { 76 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
81 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); 77 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
82 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); 78 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
83 } 79 }
84
85 return viewport; 80 return viewport;
86} 81}
87 82
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); 141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
147} 142}
148 143
149template <size_t N>
150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
151 std::array<VkDeviceSize, N> expanded;
152 std::copy(strides.begin(), strides.end(), expanded.begin());
153 return expanded;
154}
155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { 144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) { 145 if (entry.is_buffer) {
158 return ImageViewType::e2D; 146 return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
221 } 209 }
222} 210}
223 211
224} // Anonymous namespace 212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
225 213 bool is_indexed) {
226class BufferBindings final { 214 DrawParams params{
227public: 215 .base_instance = regs.vb_base_instance,
228 void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { 216 .num_instances = is_instanced ? num_instances : 1,
229 vertex.buffers[vertex.num_buffers] = buffer; 217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
230 vertex.offsets[vertex.num_buffers] = offset; 218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
231 vertex.sizes[vertex.num_buffers] = size; 219 .is_indexed = is_indexed,
232 vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); 220 };
233 ++vertex.num_buffers; 221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
234 } 222 // 6 triangle vertices per quad, base vertex is part of the index
235 223 // See BindQuadArrayIndexBuffer for more details
236 void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { 224 params.num_vertices = (params.num_vertices / 4) * 6;
237 index.buffer = buffer; 225 params.base_vertex = 0;
238 index.offset = offset; 226 params.is_indexed = true;
239 index.type = type;
240 }
241
242 void Bind(const Device& device, VKScheduler& scheduler) const {
243 // Use this large switch case to avoid dispatching more memory in the record lambda than
244 // what we need. It looks horrible, but it's the best we can do on standard C++.
245 switch (vertex.num_buffers) {
246 case 0:
247 return BindStatic<0>(device, scheduler);
248 case 1:
249 return BindStatic<1>(device, scheduler);
250 case 2:
251 return BindStatic<2>(device, scheduler);
252 case 3:
253 return BindStatic<3>(device, scheduler);
254 case 4:
255 return BindStatic<4>(device, scheduler);
256 case 5:
257 return BindStatic<5>(device, scheduler);
258 case 6:
259 return BindStatic<6>(device, scheduler);
260 case 7:
261 return BindStatic<7>(device, scheduler);
262 case 8:
263 return BindStatic<8>(device, scheduler);
264 case 9:
265 return BindStatic<9>(device, scheduler);
266 case 10:
267 return BindStatic<10>(device, scheduler);
268 case 11:
269 return BindStatic<11>(device, scheduler);
270 case 12:
271 return BindStatic<12>(device, scheduler);
272 case 13:
273 return BindStatic<13>(device, scheduler);
274 case 14:
275 return BindStatic<14>(device, scheduler);
276 case 15:
277 return BindStatic<15>(device, scheduler);
278 case 16:
279 return BindStatic<16>(device, scheduler);
280 case 17:
281 return BindStatic<17>(device, scheduler);
282 case 18:
283 return BindStatic<18>(device, scheduler);
284 case 19:
285 return BindStatic<19>(device, scheduler);
286 case 20:
287 return BindStatic<20>(device, scheduler);
288 case 21:
289 return BindStatic<21>(device, scheduler);
290 case 22:
291 return BindStatic<22>(device, scheduler);
292 case 23:
293 return BindStatic<23>(device, scheduler);
294 case 24:
295 return BindStatic<24>(device, scheduler);
296 case 25:
297 return BindStatic<25>(device, scheduler);
298 case 26:
299 return BindStatic<26>(device, scheduler);
300 case 27:
301 return BindStatic<27>(device, scheduler);
302 case 28:
303 return BindStatic<28>(device, scheduler);
304 case 29:
305 return BindStatic<29>(device, scheduler);
306 case 30:
307 return BindStatic<30>(device, scheduler);
308 case 31:
309 return BindStatic<31>(device, scheduler);
310 case 32:
311 return BindStatic<32>(device, scheduler);
312 }
313 UNREACHABLE();
314 }
315
316private:
317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
318 struct {
319 size_t num_buffers = 0;
320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
323 std::array<u16, Maxwell::NumVertexArrays> strides;
324 } vertex;
325
326 struct {
327 VkBuffer buffer = nullptr;
328 VkDeviceSize offset;
329 VkIndexType type;
330 } index;
331
332 template <size_t N>
333 void BindStatic(const Device& device, VKScheduler& scheduler) const {
334 if (device.IsExtExtendedDynamicStateSupported()) {
335 if (index.buffer) {
336 BindStatic<N, true, true>(scheduler);
337 } else {
338 BindStatic<N, false, true>(scheduler);
339 }
340 } else {
341 if (index.buffer) {
342 BindStatic<N, true, false>(scheduler);
343 } else {
344 BindStatic<N, false, false>(scheduler);
345 }
346 }
347 }
348
349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
350 void BindStatic(VKScheduler& scheduler) const {
351 static_assert(N <= Maxwell::NumVertexArrays);
352 if constexpr (N == 0) {
353 return;
354 }
355
356 std::array<VkBuffer, N> buffers;
357 std::array<VkDeviceSize, N> offsets;
358 std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
359 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
360
361 if constexpr (has_extended_dynamic_state) {
362 // With extended dynamic states we can specify the length and stride of a vertex buffer
363 std::array<VkDeviceSize, N> sizes;
364 std::array<u16, N> strides;
365 std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
366 std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
367
368 if constexpr (is_indexed) {
369 scheduler.Record(
370 [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
371 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
372 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
373 offsets.data(), sizes.data(),
374 ExpandStrides(strides).data());
375 });
376 } else {
377 scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
378 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
379 offsets.data(), sizes.data(),
380 ExpandStrides(strides).data());
381 });
382 }
383 return;
384 }
385
386 if constexpr (is_indexed) {
387 // Indexed draw
388 scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
389 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
390 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
391 });
392 } else {
393 // Array draw
394 scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
395 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
396 });
397 }
398 }
399};
400
401void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
402 if (is_indexed) {
403 cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
404 } else {
405 cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
406 } 227 }
228 return params;
407} 229}
230} // Anonymous namespace
408 231
409RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 232RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
410 Tegra::MemoryManager& gpu_memory_, 233 Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, 237 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, 238 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
416 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, 239 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), 240 state_tracker{state_tracker_}, scheduler{scheduler_},
418 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 241 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
419 update_descriptor_queue(device, scheduler), 242 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool), 243 blit_image(device, scheduler, state_tracker, descriptor_pool),
421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
424 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, 244 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 245 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
246 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
247 update_descriptor_queue, descriptor_pool),
248 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 249 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
427 descriptor_pool, update_descriptor_queue), 250 descriptor_pool, update_descriptor_queue),
428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
429 stream_buffer, staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, 251 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), 252 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
432 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 253 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
433 scheduler.SetQueryCache(query_cache); 254 scheduler.SetQueryCache(query_cache);
434 if (device.UseAsynchronousShaders()) { 255 if (device.UseAsynchronousShaders()) {
@@ -446,52 +267,51 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
446 267
447 query_cache.UpdateCounters(); 268 query_cache.UpdateCounters();
448 269
449 GraphicsPipelineCacheKey key; 270 graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
450 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
451
452 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
453 271
454 BufferBindings buffer_bindings; 272 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
455 const DrawParameters draw_params =
456 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
457 273
458 auto lock = texture_cache.AcquireLock();
459 texture_cache.SynchronizeGraphicsDescriptors(); 274 texture_cache.SynchronizeGraphicsDescriptors();
460
461 texture_cache.UpdateRenderTargets(false); 275 texture_cache.UpdateRenderTargets(false);
462 276
463 const auto shaders = pipeline_cache.GetShaders(); 277 const auto shaders = pipeline_cache.GetShaders();
464 key.shaders = GetShaderAddresses(shaders); 278 graphics_key.shaders = GetShaderAddresses(shaders);
465 SetupShaderDescriptors(shaders);
466 279
467 buffer_cache.Unmap(); 280 SetupShaderDescriptors(shaders, is_indexed);
468 281
469 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 282 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
470 key.renderpass = framebuffer->RenderPass(); 283 graphics_key.renderpass = framebuffer->RenderPass();
471 284
472 auto* const pipeline = 285 VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
473 pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); 286 graphics_key, framebuffer->NumColorBuffers(), async_shaders);
474 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { 287 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
475 // Async graphics pipeline was not ready. 288 // Async graphics pipeline was not ready.
476 return; 289 return;
477 } 290 }
478 291
479 buffer_bindings.Bind(device, scheduler);
480
481 BeginTransformFeedback(); 292 BeginTransformFeedback();
482 293
483 scheduler.RequestRenderpass(framebuffer); 294 scheduler.RequestRenderpass(framebuffer);
484 scheduler.BindGraphicsPipeline(pipeline->GetHandle()); 295 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
485 UpdateDynamicStates(); 296 UpdateDynamicStates();
486 297
487 const auto pipeline_layout = pipeline->GetLayout(); 298 const auto& regs = maxwell3d.regs;
488 const auto descriptor_set = pipeline->CommitDescriptorSet(); 299 const u32 num_instances = maxwell3d.mme_draw.instance_count;
300 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
301 const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
302 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
489 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 303 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
490 if (descriptor_set) { 304 if (descriptor_set) {
491 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 305 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
492 DESCRIPTOR_SET, descriptor_set, {}); 306 DESCRIPTOR_SET, descriptor_set, nullptr);
307 }
308 if (draw_params.is_indexed) {
309 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
310 draw_params.base_vertex, draw_params.base_instance);
311 } else {
312 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance);
493 } 314 }
494 draw_params.Draw(cmdbuf);
495 }); 315 });
496 316
497 EndTransformFeedback(); 317 EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
515 return; 335 return;
516 } 336 }
517 337
518 auto lock = texture_cache.AcquireLock(); 338 std::scoped_lock lock{texture_cache.mutex};
519 texture_cache.UpdateRenderTargets(true); 339 texture_cache.UpdateRenderTargets(true);
520 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 340 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
521 const VkExtent2D render_area = framebuffer->RenderArea(); 341 const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
559 if (use_stencil) { 379 if (use_stencil) {
560 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; 380 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
561 } 381 }
562
563 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, 382 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
564 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { 383 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
565 VkClearAttachment attachment; 384 VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
580 auto& pipeline = pipeline_cache.GetComputePipeline({ 399 auto& pipeline = pipeline_cache.GetComputePipeline({
581 .shader = code_addr, 400 .shader = code_addr,
582 .shared_memory_size = launch_desc.shared_alloc, 401 .shared_memory_size = launch_desc.shared_alloc,
583 .workgroup_size = 402 .workgroup_size{
584 { 403 launch_desc.block_dim_x,
585 launch_desc.block_dim_x, 404 launch_desc.block_dim_y,
586 launch_desc.block_dim_y, 405 launch_desc.block_dim_z,
587 launch_desc.block_dim_z, 406 },
588 },
589 }); 407 });
590 408
591 // Compute dispatches can't be executed inside a renderpass 409 // Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
594 image_view_indices.clear(); 412 image_view_indices.clear();
595 sampler_handles.clear(); 413 sampler_handles.clear();
596 414
597 auto lock = texture_cache.AcquireLock(); 415 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
598 texture_cache.SynchronizeComputeDescriptors();
599 416
600 const auto& entries = pipeline.GetEntries(); 417 const auto& entries = pipeline.GetEntries();
418 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
419 buffer_cache.UnbindComputeStorageBuffers();
420 u32 ssbo_index = 0;
421 for (const auto& buffer : entries.global_buffers) {
422 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
423 buffer.is_written);
424 ++ssbo_index;
425 }
426 buffer_cache.UpdateComputeBuffers();
427
428 texture_cache.SynchronizeComputeDescriptors();
429
601 SetupComputeUniformTexels(entries); 430 SetupComputeUniformTexels(entries);
602 SetupComputeTextures(entries); 431 SetupComputeTextures(entries);
603 SetupComputeStorageTexels(entries); 432 SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
606 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 435 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
607 texture_cache.FillComputeImageViews(indices_span, image_view_ids); 436 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
608 437
609 buffer_cache.Map(CalculateComputeStreamBufferSize());
610
611 update_descriptor_queue.Acquire(); 438 update_descriptor_queue.Acquire();
612 439
613 SetupComputeConstBuffers(entries); 440 buffer_cache.BindHostComputeBuffers();
614 SetupComputeGlobalBuffers(entries);
615 441
616 ImageViewId* image_view_id_ptr = image_view_ids.data(); 442 ImageViewId* image_view_id_ptr = image_view_ids.data();
617 VkSampler* sampler_ptr = sampler_handles.data(); 443 VkSampler* sampler_ptr = sampler_handles.data();
618 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, 444 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
619 sampler_ptr); 445 sampler_ptr);
620 446
621 buffer_cache.Unmap();
622
623 const VkPipeline pipeline_handle = pipeline.GetHandle(); 447 const VkPipeline pipeline_handle = pipeline.GetHandle();
624 const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); 448 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
625 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); 449 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
644 query_cache.Query(gpu_addr, type, timestamp); 468 query_cache.Query(gpu_addr, type, timestamp);
645} 469}
646 470
471void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
472 u32 size) {
473 buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
474}
475
647void RasterizerVulkan::FlushAll() {} 476void RasterizerVulkan::FlushAll() {}
648 477
649void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { 478void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
651 return; 480 return;
652 } 481 }
653 { 482 {
654 auto lock = texture_cache.AcquireLock(); 483 std::scoped_lock lock{texture_cache.mutex};
655 texture_cache.DownloadMemory(addr, size); 484 texture_cache.DownloadMemory(addr, size);
656 } 485 }
657 buffer_cache.FlushRegion(addr, size); 486 {
487 std::scoped_lock lock{buffer_cache.mutex};
488 buffer_cache.DownloadMemory(addr, size);
489 }
658 query_cache.FlushRegion(addr, size); 490 query_cache.FlushRegion(addr, size);
659} 491}
660 492
661bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { 493bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
494 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
662 if (!Settings::IsGPULevelHigh()) { 495 if (!Settings::IsGPULevelHigh()) {
663 return buffer_cache.MustFlushRegion(addr, size); 496 return buffer_cache.IsRegionGpuModified(addr, size);
664 } 497 }
665 return texture_cache.IsRegionGpuModified(addr, size) || 498 return texture_cache.IsRegionGpuModified(addr, size) ||
666 buffer_cache.MustFlushRegion(addr, size); 499 buffer_cache.IsRegionGpuModified(addr, size);
667} 500}
668 501
669void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 502void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
671 return; 504 return;
672 } 505 }
673 { 506 {
674 auto lock = texture_cache.AcquireLock(); 507 std::scoped_lock lock{texture_cache.mutex};
675 texture_cache.WriteMemory(addr, size); 508 texture_cache.WriteMemory(addr, size);
676 } 509 }
510 {
511 std::scoped_lock lock{buffer_cache.mutex};
512 buffer_cache.WriteMemory(addr, size);
513 }
677 pipeline_cache.InvalidateRegion(addr, size); 514 pipeline_cache.InvalidateRegion(addr, size);
678 buffer_cache.InvalidateRegion(addr, size);
679 query_cache.InvalidateRegion(addr, size); 515 query_cache.InvalidateRegion(addr, size);
680} 516}
681 517
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
683 if (addr == 0 || size == 0) { 519 if (addr == 0 || size == 0) {
684 return; 520 return;
685 } 521 }
522 pipeline_cache.OnCPUWrite(addr, size);
686 { 523 {
687 auto lock = texture_cache.AcquireLock(); 524 std::scoped_lock lock{texture_cache.mutex};
688 texture_cache.WriteMemory(addr, size); 525 texture_cache.WriteMemory(addr, size);
689 } 526 }
690 pipeline_cache.OnCPUWrite(addr, size); 527 {
691 buffer_cache.OnCPUWrite(addr, size); 528 std::scoped_lock lock{buffer_cache.mutex};
529 buffer_cache.CachedWriteMemory(addr, size);
530 }
692} 531}
693 532
694void RasterizerVulkan::SyncGuestHost() { 533void RasterizerVulkan::SyncGuestHost() {
695 buffer_cache.SyncGuestHost();
696 pipeline_cache.SyncGuestHost(); 534 pipeline_cache.SyncGuestHost();
535 {
536 std::scoped_lock lock{buffer_cache.mutex};
537 buffer_cache.FlushCachedWrites();
538 }
697} 539}
698 540
699void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 541void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
700 { 542 {
701 auto lock = texture_cache.AcquireLock(); 543 std::scoped_lock lock{texture_cache.mutex};
702 texture_cache.UnmapMemory(addr, size); 544 texture_cache.UnmapMemory(addr, size);
703 } 545 }
704 buffer_cache.OnCPUWrite(addr, size); 546 {
547 std::scoped_lock lock{buffer_cache.mutex};
548 buffer_cache.WriteMemory(addr, size);
549 }
705 pipeline_cache.OnCPUWrite(addr, size); 550 pipeline_cache.OnCPUWrite(addr, size);
706} 551}
707 552
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
774 draw_counter = 0; 619 draw_counter = 0;
775 update_descriptor_queue.TickFrame(); 620 update_descriptor_queue.TickFrame();
776 fence_manager.TickFrame(); 621 fence_manager.TickFrame();
777 buffer_cache.TickFrame();
778 staging_pool.TickFrame(); 622 staging_pool.TickFrame();
779 { 623 {
780 auto lock = texture_cache.AcquireLock(); 624 std::scoped_lock lock{texture_cache.mutex};
781 texture_cache.TickFrame(); 625 texture_cache.TickFrame();
782 } 626 }
627 {
628 std::scoped_lock lock{buffer_cache.mutex};
629 buffer_cache.TickFrame();
630 }
783} 631}
784 632
785bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 633bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
786 const Tegra::Engines::Fermi2D::Surface& dst, 634 const Tegra::Engines::Fermi2D::Surface& dst,
787 const Tegra::Engines::Fermi2D::Config& copy_config) { 635 const Tegra::Engines::Fermi2D::Config& copy_config) {
788 auto lock = texture_cache.AcquireLock(); 636 std::scoped_lock lock{texture_cache.mutex};
789 texture_cache.BlitImage(dst, src, copy_config); 637 texture_cache.BlitImage(dst, src, copy_config);
790 return true; 638 return true;
791} 639}
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
795 if (!framebuffer_addr) { 643 if (!framebuffer_addr) {
796 return false; 644 return false;
797 } 645 }
798 646 std::scoped_lock lock{texture_cache.mutex};
799 auto lock = texture_cache.AcquireLock();
800 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); 647 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
801 if (!image_view) { 648 if (!image_view) {
802 return false; 649 return false;
803 } 650 }
804
805 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 651 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
806 screen_info.width = image_view->size.width; 652 screen_info.width = image_view->size.width;
807 screen_info.height = image_view->size.height; 653 screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
830 draw_counter = 0; 676 draw_counter = 0;
831} 677}
832 678
833RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
834 BufferBindings& buffer_bindings,
835 bool is_indexed,
836 bool is_instanced) {
837 MICROPROFILE_SCOPE(Vulkan_Geometry);
838
839 const auto& regs = maxwell3d.regs;
840
841 SetupVertexArrays(buffer_bindings);
842
843 const u32 base_instance = regs.vb_base_instance;
844 const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
845 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
846 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
847
848 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
849 SetupIndexBuffer(buffer_bindings, params, is_indexed);
850
851 return params;
852}
853
854void RasterizerVulkan::SetupShaderDescriptors( 679void RasterizerVulkan::SetupShaderDescriptors(
855 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 680 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
856 image_view_indices.clear(); 681 image_view_indices.clear();
857 sampler_handles.clear(); 682 sampler_handles.clear();
858 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 683 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
860 if (!shader) { 685 if (!shader) {
861 continue; 686 continue;
862 } 687 }
863 const auto& entries = shader->GetEntries(); 688 const ShaderEntries& entries = shader->GetEntries();
864 SetupGraphicsUniformTexels(entries, stage); 689 SetupGraphicsUniformTexels(entries, stage);
865 SetupGraphicsTextures(entries, stage); 690 SetupGraphicsTextures(entries, stage);
866 SetupGraphicsStorageTexels(entries, stage); 691 SetupGraphicsStorageTexels(entries, stage);
867 SetupGraphicsImages(entries, stage); 692 SetupGraphicsImages(entries, stage);
693
694 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
695 buffer_cache.UnbindGraphicsStorageBuffers(stage);
696 u32 ssbo_index = 0;
697 for (const auto& buffer : entries.global_buffers) {
698 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
699 buffer.cbuf_offset, buffer.is_written);
700 ++ssbo_index;
701 }
868 } 702 }
869 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 703 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
704 buffer_cache.UpdateGraphicsBuffers(is_indexed);
870 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); 705 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
871 706
707 buffer_cache.BindHostGeometryBuffers(is_indexed);
708
872 update_descriptor_queue.Acquire(); 709 update_descriptor_queue.Acquire();
873 710
874 ImageViewId* image_view_id_ptr = image_view_ids.data(); 711 ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
879 if (!shader) { 716 if (!shader) {
880 continue; 717 continue;
881 } 718 }
882 const auto& entries = shader->GetEntries(); 719 buffer_cache.BindHostStageBuffers(stage);
883 SetupGraphicsConstBuffers(entries, stage); 720 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
884 SetupGraphicsGlobalBuffers(entries, stage); 721 image_view_id_ptr, sampler_ptr);
885 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
886 sampler_ptr);
887 } 722 }
888} 723}
889 724
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
916 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); 751 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
917 return; 752 return;
918 } 753 }
919
920 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 754 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
921 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 755 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
922 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 756 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
923 757 scheduler.Record(
924 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); 758 [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
925 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
926 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
927
928 const auto& binding = regs.tfb_bindings[0];
929 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
930 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
931
932 const GPUVAddr gpu_addr = binding.Address();
933 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
934 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
935
936 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
937 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
938 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
939 });
940} 759}
941 760
942void RasterizerVulkan::EndTransformFeedback() { 761void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
947 if (!device.IsExtTransformFeedbackSupported()) { 766 if (!device.IsExtTransformFeedbackSupported()) {
948 return; 767 return;
949 } 768 }
950
951 scheduler.Record( 769 scheduler.Record(
952 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 770 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
953} 771}
954 772
955void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
956 const auto& regs = maxwell3d.regs;
957
958 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
959 const auto& vertex_array = regs.vertex_array[index];
960 if (!vertex_array.IsEnabled()) {
961 continue;
962 }
963 const GPUVAddr start{vertex_array.StartAddress()};
964 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
965
966 ASSERT(end >= start);
967 const size_t size = end - start;
968 if (size == 0) {
969 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
970 continue;
971 }
972 const auto info = buffer_cache.UploadMemory(start, size);
973 buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
974 }
975}
976
977void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
978 bool is_indexed) {
979 if (params.num_vertices == 0) {
980 return;
981 }
982 const auto& regs = maxwell3d.regs;
983 switch (regs.draw.topology) {
984 case Maxwell::PrimitiveTopology::Quads: {
985 if (!params.is_indexed) {
986 const auto [buffer, offset] =
987 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
988 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
989 params.base_vertex = 0;
990 params.num_vertices = params.num_vertices * 6 / 4;
991 params.is_indexed = true;
992 break;
993 }
994 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
995 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
996 VkBuffer buffer = info.handle;
997 u64 offset = info.offset;
998 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
999 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
1000
1001 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
1002 params.num_vertices = (params.num_vertices / 4) * 6;
1003 params.base_vertex = 0;
1004 break;
1005 }
1006 default: {
1007 if (!is_indexed) {
1008 break;
1009 }
1010 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
1011 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1012 VkBuffer buffer = info.handle;
1013 u64 offset = info.offset;
1014
1015 auto format = regs.index_array.format;
1016 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
1017 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
1018 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
1019 format = Maxwell::IndexFormat::UnsignedShort;
1020 }
1021
1022 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
1023 break;
1024 }
1025 }
1026}
1027
1028void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1029 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1030 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1031 for (const auto& entry : entries.const_buffers) {
1032 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
1033 }
1034}
1035
1036void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1037 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1038 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1039
1040 for (const auto& entry : entries.global_buffers) {
1041 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
1042 SetupGlobalBuffer(entry, addr);
1043 }
1044}
1045
1046void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { 773void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1047 MICROPROFILE_SCOPE(Vulkan_Textures);
1048 const auto& regs = maxwell3d.regs; 774 const auto& regs = maxwell3d.regs;
1049 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 775 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1050 for (const auto& entry : entries.uniform_texels) { 776 for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
1054} 780}
1055 781
1056void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { 782void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1057 MICROPROFILE_SCOPE(Vulkan_Textures);
1058 const auto& regs = maxwell3d.regs; 783 const auto& regs = maxwell3d.regs;
1059 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 784 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1060 for (const auto& entry : entries.samplers) { 785 for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
1070} 795}
1071 796
1072void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { 797void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1073 MICROPROFILE_SCOPE(Vulkan_Textures);
1074 const auto& regs = maxwell3d.regs; 798 const auto& regs = maxwell3d.regs;
1075 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 799 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1076 for (const auto& entry : entries.storage_texels) { 800 for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
1080} 804}
1081 805
1082void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { 806void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1083 MICROPROFILE_SCOPE(Vulkan_Images);
1084 const auto& regs = maxwell3d.regs; 807 const auto& regs = maxwell3d.regs;
1085 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 808 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1086 for (const auto& entry : entries.images) { 809 for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
1089 } 812 }
1090} 813}
1091 814
1092void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1093 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1094 const auto& launch_desc = kepler_compute.launch_description;
1095 for (const auto& entry : entries.const_buffers) {
1096 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1097 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1098 const Tegra::Engines::ConstBufferInfo info{
1099 .address = config.Address(),
1100 .size = config.size,
1101 .enabled = mask[entry.GetIndex()],
1102 };
1103 SetupConstBuffer(entry, info);
1104 }
1105}
1106
1107void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1108 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1109 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1110 for (const auto& entry : entries.global_buffers) {
1111 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
1112 SetupGlobalBuffer(entry, addr);
1113 }
1114}
1115
1116void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 815void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1117 MICROPROFILE_SCOPE(Vulkan_Textures);
1118 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 816 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1119 for (const auto& entry : entries.uniform_texels) { 817 for (const auto& entry : entries.uniform_texels) {
1120 const TextureHandle handle = 818 const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1124} 822}
1125 823
1126void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 824void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1127 MICROPROFILE_SCOPE(Vulkan_Textures);
1128 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 825 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1129 for (const auto& entry : entries.samplers) { 826 for (const auto& entry : entries.samplers) {
1130 for (size_t index = 0; index < entry.size; ++index) { 827 for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1139} 836}
1140 837
1141void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 838void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1142 MICROPROFILE_SCOPE(Vulkan_Textures);
1143 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 839 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1144 for (const auto& entry : entries.storage_texels) { 840 for (const auto& entry : entries.storage_texels) {
1145 const TextureHandle handle = 841 const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1149} 845}
1150 846
1151void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 847void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1152 MICROPROFILE_SCOPE(Vulkan_Images);
1153 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 848 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1154 for (const auto& entry : entries.images) { 849 for (const auto& entry : entries.images) {
1155 const TextureHandle handle = 850 const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1158 } 853 }
1159} 854}
1160 855
1161void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1162 const Tegra::Engines::ConstBufferInfo& buffer) {
1163 if (!buffer.enabled) {
1164 // Set values to zero to unbind buffers
1165 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1166 return;
1167 }
1168 // Align the size to avoid bad std140 interactions
1169 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1170 ASSERT(size <= MaxConstbufferSize);
1171
1172 const u64 alignment = device.GetUniformBufferAlignment();
1173 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1174 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1175}
1176
1177void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
1178 const u64 actual_addr = gpu_memory.Read<u64>(address);
1179 const u32 size = gpu_memory.Read<u32>(address + 8);
1180
1181 if (size == 0) {
1182 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
1183 // because Vulkan doesn't like empty buffers.
1184 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1185 // default buffer.
1186 static constexpr size_t dummy_size = 4;
1187 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1188 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1189 return;
1190 }
1191
1192 const auto info = buffer_cache.UploadMemory(
1193 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1194 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1195}
1196
1197void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 856void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1198 if (!state_tracker.TouchViewports()) { 857 if (!state_tracker.TouchViewports()) {
1199 return; 858 return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
1206 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), 865 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
1207 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), 866 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
1208 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), 867 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
1209 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; 868 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
869 };
1210 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); 870 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
1211} 871}
1212 872
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
1214 if (!state_tracker.TouchScissors()) { 874 if (!state_tracker.TouchScissors()) {
1215 return; 875 return;
1216 } 876 }
1217 const std::array scissors = { 877 const std::array scissors{
1218 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), 878 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
1219 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), 879 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
1220 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), 880 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
1221 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), 881 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
1222 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), 882 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
1223 GetScissorState(regs, 15)}; 883 GetScissorState(regs, 15),
884 };
1224 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); 885 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
1225} 886}
1226 887
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1385 }); 1046 });
1386} 1047}
1387 1048
1388size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1389 size_t size = CalculateVertexArraysSize();
1390 if (is_indexed) {
1391 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1392 }
1393 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1394 return size;
1395}
1396
1397size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1398 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1399 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1400}
1401
1402size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1403 const auto& regs = maxwell3d.regs;
1404
1405 size_t size = 0;
1406 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1407 // This implementation assumes that all attributes are used in the shader.
1408 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1409 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1410 DEBUG_ASSERT(end >= start);
1411
1412 size += (end - start) * regs.vertex_array[index].enable;
1413 }
1414 return size;
1415}
1416
1417size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1418 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1419 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1420}
1421
1422size_t RasterizerVulkan::CalculateConstBufferSize(
1423 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1424 if (entry.IsIndirect()) {
1425 // Buffer is accessed indirectly, so upload the entire thing
1426 return buffer.size;
1427 } else {
1428 // Buffer is accessed directly, upload just what we use
1429 return entry.GetSize();
1430 }
1431}
1432
1433VkBuffer RasterizerVulkan::DefaultBuffer() {
1434 if (default_buffer) {
1435 return *default_buffer;
1436 }
1437 default_buffer = device.GetLogical().CreateBuffer({
1438 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1439 .pNext = nullptr,
1440 .flags = 0,
1441 .size = DEFAULT_BUFFER_SIZE,
1442 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1443 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
1444 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1445 .queueFamilyIndexCount = 0,
1446 .pQueueFamilyIndices = nullptr,
1447 });
1448 default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
1449
1450 scheduler.RequestOutsideRenderPassOperationContext();
1451 scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
1452 cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
1453 });
1454 return *default_buffer;
1455}
1456
1457} // namespace Vulkan 1049} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..acea1ba2d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,13 @@
18#include "video_core/renderer_vulkan/blit_image.h" 18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 21#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 22#include "video_core/renderer_vulkan/vk_fence_manager.h"
23#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
24#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
25#include "video_core/renderer_vulkan/vk_query_cache.h" 25#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 27#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
28#include "video_core/renderer_vulkan/vk_stream_buffer.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 28#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 29#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h" 30#include "video_core/shader/async_shaders.h"
@@ -49,7 +48,6 @@ namespace Vulkan {
49struct VKScreenInfo; 48struct VKScreenInfo;
50 49
51class StateTracker; 50class StateTracker;
52class BufferBindings;
53 51
54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 52class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
55public: 53public:
@@ -65,6 +63,7 @@ public:
65 void DispatchCompute(GPUVAddr code_addr) override; 63 void DispatchCompute(GPUVAddr code_addr) override;
66 void ResetCounter(VideoCore::QueryType type) override; 64 void ResetCounter(VideoCore::QueryType type) override;
67 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 65 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
66 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
68 void FlushAll() override; 67 void FlushAll() override;
69 void FlushRegion(VAddr addr, u64 size) override; 68 void FlushRegion(VAddr addr, u64 size) override;
70 bool MustFlushRegion(VAddr addr, u64 size) override; 69 bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +106,11 @@ private:
107 106
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); 107 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109 108
110 struct DrawParameters {
111 void Draw(vk::CommandBuffer cmdbuf) const;
112
113 u32 base_instance = 0;
114 u32 num_instances = 0;
115 u32 base_vertex = 0;
116 u32 num_vertices = 0;
117 bool is_indexed = 0;
118 };
119
120 void FlushWork(); 109 void FlushWork();
121 110
122 /// Setups geometry buffers and state.
123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
124 bool is_indexed, bool is_instanced);
125
126 /// Setup descriptors in the graphics pipeline. 111 /// Setup descriptors in the graphics pipeline.
127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 112 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
113 bool is_indexed);
128 114
129 void UpdateDynamicStates(); 115 void UpdateDynamicStates();
130 116
@@ -132,16 +118,6 @@ private:
132 118
133 void EndTransformFeedback(); 119 void EndTransformFeedback();
134 120
135 void SetupVertexArrays(BufferBindings& buffer_bindings);
136
137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
138
139 /// Setup constant buffers in the graphics pipeline.
140 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
141
142 /// Setup global buffers in the graphics pipeline.
143 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
144
145 /// Setup uniform texels in the graphics pipeline. 121 /// Setup uniform texels in the graphics pipeline.
146 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); 122 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
147 123
@@ -154,12 +130,6 @@ private:
154 /// Setup images in the graphics pipeline. 130 /// Setup images in the graphics pipeline.
155 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); 131 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
156 132
157 /// Setup constant buffers in the compute pipeline.
158 void SetupComputeConstBuffers(const ShaderEntries& entries);
159
160 /// Setup global buffers in the compute pipeline.
161 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
162
163 /// Setup texel buffers in the compute pipeline. 133 /// Setup texel buffers in the compute pipeline.
164 void SetupComputeUniformTexels(const ShaderEntries& entries); 134 void SetupComputeUniformTexels(const ShaderEntries& entries);
165 135
@@ -172,11 +142,6 @@ private:
172 /// Setup images in the compute pipeline. 142 /// Setup images in the compute pipeline.
173 void SetupComputeImages(const ShaderEntries& entries); 143 void SetupComputeImages(const ShaderEntries& entries);
174 144
175 void SetupConstBuffer(const ConstBufferEntry& entry,
176 const Tegra::Engines::ConstBufferInfo& buffer);
177
178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
179
180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 145 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 146 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 147 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +158,6 @@ private:
193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 158 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 159 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
195 160
196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
197
198 size_t CalculateComputeStreamBufferSize() const;
199
200 size_t CalculateVertexArraysSize() const;
201
202 size_t CalculateIndexBufferSize() const;
203
204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
206
207 VkBuffer DefaultBuffer();
208
209 Tegra::GPU& gpu; 161 Tegra::GPU& gpu;
210 Tegra::MemoryManager& gpu_memory; 162 Tegra::MemoryManager& gpu_memory;
211 Tegra::Engines::Maxwell3D& maxwell3d; 163 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +169,21 @@ private:
217 StateTracker& state_tracker; 169 StateTracker& state_tracker;
218 VKScheduler& scheduler; 170 VKScheduler& scheduler;
219 171
220 VKStreamBuffer stream_buffer;
221 StagingBufferPool staging_pool; 172 StagingBufferPool staging_pool;
222 VKDescriptorPool descriptor_pool; 173 VKDescriptorPool descriptor_pool;
223 VKUpdateDescriptorQueue update_descriptor_queue; 174 VKUpdateDescriptorQueue update_descriptor_queue;
224 BlitImageHelper blit_image; 175 BlitImageHelper blit_image;
225 QuadArrayPass quad_array_pass; 176
226 QuadIndexedPass quad_indexed_pass; 177 GraphicsPipelineCacheKey graphics_key;
227 Uint8Pass uint8_pass;
228 178
229 TextureCacheRuntime texture_cache_runtime; 179 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache; 180 TextureCache texture_cache;
181 BufferCacheRuntime buffer_cache_runtime;
182 BufferCache buffer_cache;
231 VKPipelineCache pipeline_cache; 183 VKPipelineCache pipeline_cache;
232 VKBufferCache buffer_cache;
233 VKQueryCache query_cache; 184 VKQueryCache query_cache;
234 VKFenceManager fence_manager; 185 VKFenceManager fence_manager;
235 186
236 vk::Buffer default_buffer;
237 MemoryCommit default_buffer_commit;
238 vk::Event wfi_event; 187 vk::Event wfi_event;
239 VideoCommon::Shader::AsyncShaders async_shaders; 188 VideoCommon::Shader::AsyncShaders async_shaders;
240 189
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index ee274ac59..a8bf7bda8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -17,21 +17,21 @@ ResourcePool::~ResourcePool() = default;
17size_t ResourcePool::CommitResource() { 17size_t ResourcePool::CommitResource() {
18 // Refresh semaphore to query updated results 18 // Refresh semaphore to query updated results
19 master_semaphore.Refresh(); 19 master_semaphore.Refresh();
20 20 const u64 gpu_tick = master_semaphore.KnownGpuTick();
21 const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> { 21 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
22 for (size_t iterator = begin; iterator < end; ++iterator) { 22 for (size_t iterator = begin; iterator < end; ++iterator) {
23 if (master_semaphore.IsFree(ticks[iterator])) { 23 if (gpu_tick >= ticks[iterator]) {
24 ticks[iterator] = master_semaphore.CurrentTick(); 24 ticks[iterator] = master_semaphore.CurrentTick();
25 return iterator; 25 return iterator;
26 } 26 }
27 } 27 }
28 return {}; 28 return std::nullopt;
29 }; 29 };
30 // Try to find a free resource from the hinted position to the end. 30 // Try to find a free resource from the hinted position to the end.
31 auto found = search(free_iterator, ticks.size()); 31 std::optional<size_t> found = search(hint_iterator, ticks.size());
32 if (!found) { 32 if (!found) {
33 // Search from beginning to the hinted position. 33 // Search from beginning to the hinted position.
34 found = search(0, free_iterator); 34 found = search(0, hint_iterator);
35 if (!found) { 35 if (!found) {
36 // Both searches failed, the pool is full; handle it. 36 // Both searches failed, the pool is full; handle it.
37 const size_t free_resource = ManageOverflow(); 37 const size_t free_resource = ManageOverflow();
@@ -41,7 +41,7 @@ size_t ResourcePool::CommitResource() {
41 } 41 }
42 } 42 }
43 // Free iterator is hinted to the resource after the one that's been commited. 43 // Free iterator is hinted to the resource after the one that's been commited.
44 free_iterator = (*found + 1) % ticks.size(); 44 hint_iterator = (*found + 1) % ticks.size();
45 return *found; 45 return *found;
46} 46}
47 47
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index a018c7ec2..9d0bb3b4d 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -36,7 +36,7 @@ private:
36 36
37 MasterSemaphore& master_semaphore; 37 MasterSemaphore& master_semaphore;
38 size_t grow_step = 0; ///< Number of new resources created after an overflow 38 size_t grow_step = 0; ///< Number of new resources created after an overflow
39 size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found 39 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
40 std::vector<u64> ticks; ///< Ticks for each resource 40 std::vector<u64> ticks; ///< Ticks for each resource
41}; 41};
42 42
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
52 worker_thread.join(); 52 worker_thread.join();
53} 53}
54 54
55u64 VKScheduler::CurrentTick() const noexcept {
56 return master_semaphore->CurrentTick();
57}
58
59bool VKScheduler::IsFree(u64 tick) const noexcept {
60 return master_semaphore->IsFree(tick);
61}
62
63void VKScheduler::Wait(u64 tick) {
64 master_semaphore->Wait(tick);
65}
66
67void VKScheduler::Flush(VkSemaphore semaphore) { 55void VKScheduler::Flush(VkSemaphore semaphore) {
68 SubmitExecution(semaphore); 56 SubmitExecution(semaphore);
69 AllocateNewContext(); 57 AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 257 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 258 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 259 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, 260 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images)); 261 vk::Span(barriers.data(), num_images));
274 }); 262 });
275 state.renderpass = nullptr; 263 state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/threadsafe_queue.h" 16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
21class CommandPool; 22class CommandPool;
22class Device; 23class Device;
23class Framebuffer; 24class Framebuffer;
24class MasterSemaphore;
25class StateTracker; 25class StateTracker;
26class VKQueryCache; 26class VKQueryCache;
27 27
@@ -32,15 +32,6 @@ public:
32 explicit VKScheduler(const Device& device, StateTracker& state_tracker); 32 explicit VKScheduler(const Device& device, StateTracker& state_tracker);
33 ~VKScheduler(); 33 ~VKScheduler();
34 34
35 /// Returns the current command buffer tick.
36 [[nodiscard]] u64 CurrentTick() const noexcept;
37
38 /// Returns true when a tick has been triggered by the GPU.
39 [[nodiscard]] bool IsFree(u64 tick) const noexcept;
40
41 /// Waits for the given tick to trigger on the GPU.
42 void Wait(u64 tick);
43
44 /// Sends the current execution context to the GPU. 35 /// Sends the current execution context to the GPU.
45 void Flush(VkSemaphore semaphore = nullptr); 36 void Flush(VkSemaphore semaphore = nullptr);
46 37
@@ -82,6 +73,21 @@ public:
82 (void)chunk->Record(command); 73 (void)chunk->Record(command);
83 } 74 }
84 75
76 /// Returns the current command buffer tick.
77 [[nodiscard]] u64 CurrentTick() const noexcept {
78 return master_semaphore->CurrentTick();
79 }
80
81 /// Returns true when a tick has been triggered by the GPU.
82 [[nodiscard]] bool IsFree(u64 tick) const noexcept {
83 return master_semaphore->IsFree(tick);
84 }
85
86 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) {
88 master_semaphore->Wait(tick);
89 }
90
85 /// Returns the master timeline semaphore. 91 /// Returns the master timeline semaphore.
86 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { 92 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
87 return *master_semaphore; 93 return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..40e2e0d38 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3106 entries.const_buffers.emplace_back(cbuf.second, cbuf.first); 3106 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
3107 } 3107 }
3108 for (const auto& [base, usage] : ir.GetGlobalMemory()) { 3108 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
3109 entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); 3109 entries.global_buffers.emplace_back(GlobalBufferEntry{
3110 .cbuf_index = base.cbuf_index,
3111 .cbuf_offset = base.cbuf_offset,
3112 .is_written = usage.is_written,
3113 });
3110 } 3114 }
3111 for (const auto& sampler : ir.GetSamplers()) { 3115 for (const auto& sampler : ir.GetSamplers()) {
3112 if (sampler.is_buffer) { 3116 if (sampler.is_buffer) {
@@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3127 entries.attributes.insert(GetGenericAttributeLocation(attribute)); 3131 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3128 } 3132 }
3129 } 3133 }
3134 for (const auto& buffer : entries.const_buffers) {
3135 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3136 }
3130 entries.clip_distances = ir.GetClipDistances(); 3137 entries.clip_distances = ir.GetClipDistances();
3131 entries.shader_length = ir.GetLength(); 3138 entries.shader_length = ir.GetLength();
3132 entries.uses_warps = ir.UsesWarps(); 3139 entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
39 u32 index{}; 39 u32 index{};
40}; 40};
41 41
42class GlobalBufferEntry { 42struct GlobalBufferEntry {
43public:
44 constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
45 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
46
47 constexpr u32 GetCbufIndex() const {
48 return cbuf_index;
49 }
50
51 constexpr u32 GetCbufOffset() const {
52 return cbuf_offset;
53 }
54
55 constexpr bool IsWritten() const {
56 return is_written;
57 }
58
59private:
60 u32 cbuf_index{}; 43 u32 cbuf_index{};
61 u32 cbuf_offset{}; 44 u32 cbuf_offset{};
62 bool is_written{}; 45 bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
78 std::set<u32> attributes; 61 std::set<u32> attributes;
79 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
80 std::size_t shader_length{}; 63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
81 bool uses_warps{}; 65 bool uses_warps{};
82}; 66};
83 67
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..7a1232497 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
8 8
9#include <fmt/format.h> 9#include <fmt/format.h>
10 10
11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
12#include "common/bit_util.h" 13#include "common/bit_util.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
@@ -17,18 +18,119 @@
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
21namespace {
22// Maximum potential alignment of a Vulkan buffer
23constexpr VkDeviceSize MAX_ALIGNMENT = 256;
24// Maximum size to put elements in the stream buffer
25constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
26// Stream buffer size in bytes
27constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
28constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
29
30constexpr VkMemoryPropertyFlags HOST_FLAGS =
31 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
32constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
33
34bool IsStreamHeap(VkMemoryHeap heap) noexcept {
35 return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
36}
37
38std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
39 VkMemoryPropertyFlags flags) noexcept {
40 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
41 if (((type_mask >> type_index) & 1) == 0) {
42 // Memory type is incompatible
43 continue;
44 }
45 const VkMemoryType& memory_type = props.memoryTypes[type_index];
46 if ((memory_type.propertyFlags & flags) != flags) {
47 // Memory type doesn't have the flags we want
48 continue;
49 }
50 if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
51 // Memory heap is not suitable for streaming
52 continue;
53 }
54 // Success!
55 return type_index;
56 }
57 return std::nullopt;
58}
59
60u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
61 // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
62 std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
63 if (type) {
64 return *type;
65 }
66 // Otherwise try without the DEVICE_LOCAL_BIT
67 type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
68 if (type) {
69 return *type;
70 }
71 // This should never happen, and in case it does, signal it as an out of memory situation
72 throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
73}
74
75size_t Region(size_t iterator) noexcept {
76 return iterator / REGION_SIZE;
77}
78} // Anonymous namespace
20 79
21StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, 80StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
22 VKScheduler& scheduler_) 81 VKScheduler& scheduler_)
23 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} 82 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
83 const vk::Device& dev = device.GetLogical();
84 stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
85 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
86 .pNext = nullptr,
87 .flags = 0,
88 .size = STREAM_BUFFER_SIZE,
89 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
90 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
91 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
92 .queueFamilyIndexCount = 0,
93 .pQueueFamilyIndices = nullptr,
94 });
95 if (device.HasDebuggingToolAttached()) {
96 stream_buffer.SetObjectNameEXT("Stream Buffer");
97 }
98 VkMemoryDedicatedRequirements dedicated_reqs{
99 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
100 .pNext = nullptr,
101 .prefersDedicatedAllocation = VK_FALSE,
102 .requiresDedicatedAllocation = VK_FALSE,
103 };
104 const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
105 const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
106 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
107 const VkMemoryDedicatedAllocateInfo dedicated_info{
108 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
109 .pNext = nullptr,
110 .image = nullptr,
111 .buffer = *stream_buffer,
112 };
113 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
114 stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
115 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
116 .pNext = make_dedicated ? &dedicated_info : nullptr,
117 .allocationSize = requirements.size,
118 .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
119 });
120 if (device.HasDebuggingToolAttached()) {
121 stream_memory.SetObjectNameEXT("Stream Buffer Memory");
122 }
123 stream_buffer.BindMemory(*stream_memory, 0);
124 stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
125}
24 126
25StagingBufferPool::~StagingBufferPool() = default; 127StagingBufferPool::~StagingBufferPool() = default;
26 128
27StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { 129StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
28 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { 130 if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
29 return *ref; 131 return GetStreamBuffer(size);
30 } 132 }
31 return CreateStagingBuffer(size, usage); 133 return GetStagingBuffer(size, usage);
32} 134}
33 135
34void StagingBufferPool::TickFrame() { 136void StagingBufferPool::TickFrame() {
@@ -39,6 +141,52 @@ void StagingBufferPool::TickFrame() {
39 ReleaseCache(MemoryUsage::Download); 141 ReleaseCache(MemoryUsage::Download);
40} 142}
41 143
144StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
145 if (AreRegionsActive(Region(free_iterator) + 1,
146 std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
147 // Avoid waiting for the previous usages to be free
148 return GetStagingBuffer(size, MemoryUsage::Upload);
149 }
150 const u64 current_tick = scheduler.CurrentTick();
151 std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
152 current_tick);
153 used_iterator = iterator;
154 free_iterator = std::max(free_iterator, iterator + size);
155
156 if (iterator + size >= STREAM_BUFFER_SIZE) {
157 std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
158 current_tick);
159 used_iterator = 0;
160 iterator = 0;
161 free_iterator = size;
162
163 if (AreRegionsActive(0, Region(size) + 1)) {
164 // Avoid waiting for the previous usages to be free
165 return GetStagingBuffer(size, MemoryUsage::Upload);
166 }
167 }
168 const size_t offset = iterator;
169 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
170 return StagingBufferRef{
171 .buffer = *stream_buffer,
172 .offset = static_cast<VkDeviceSize>(offset),
173 .mapped_span = std::span<u8>(stream_pointer + offset, size),
174 };
175}
176
177bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
178 const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
179 return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
180 [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
181};
182
183StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
184 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
185 return *ref;
186 }
187 return CreateStagingBuffer(size, usage);
188}
189
42std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, 190std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
43 MemoryUsage usage) { 191 MemoryUsage usage) {
44 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; 192 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..69f7618de 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
19 19
20struct StagingBufferRef { 20struct StagingBufferRef {
21 VkBuffer buffer; 21 VkBuffer buffer;
22 VkDeviceSize offset;
22 std::span<u8> mapped_span; 23 std::span<u8> mapped_span;
23}; 24};
24 25
25class StagingBufferPool { 26class StagingBufferPool {
26public: 27public:
28 static constexpr size_t NUM_SYNCS = 16;
29
27 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, 30 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
28 VKScheduler& scheduler); 31 VKScheduler& scheduler);
29 ~StagingBufferPool(); 32 ~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
33 void TickFrame(); 36 void TickFrame();
34 37
35private: 38private:
39 struct StreamBufferCommit {
40 size_t upper_bound;
41 u64 tick;
42 };
43
36 struct StagingBuffer { 44 struct StagingBuffer {
37 vk::Buffer buffer; 45 vk::Buffer buffer;
38 MemoryCommit commit; 46 MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
42 StagingBufferRef Ref() const noexcept { 50 StagingBufferRef Ref() const noexcept {
43 return { 51 return {
44 .buffer = *buffer, 52 .buffer = *buffer,
53 .offset = 0,
45 .mapped_span = mapped_span, 54 .mapped_span = mapped_span,
46 }; 55 };
47 } 56 }
@@ -56,6 +65,12 @@ private:
56 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; 65 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
57 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; 66 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
58 67
68 StagingBufferRef GetStreamBuffer(size_t size);
69
70 bool AreRegionsActive(size_t region_begin, size_t region_end) const;
71
72 StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
73
59 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); 74 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
60 75
61 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); 76 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +85,15 @@ private:
70 MemoryAllocator& memory_allocator; 85 MemoryAllocator& memory_allocator;
71 VKScheduler& scheduler; 86 VKScheduler& scheduler;
72 87
88 vk::Buffer stream_buffer;
89 vk::DeviceMemory stream_memory;
90 u8* stream_pointer = nullptr;
91
92 size_t iterator = 0;
93 size_t used_iterator = 0;
94 size_t free_iterator = 0;
95 std::array<u64, NUM_SYNCS> sync_ticks{};
96
73 StagingBuffersCache device_local_cache; 97 StagingBuffersCache device_local_cache;
74 StagingBuffersCache upload_cache; 98 StagingBuffersCache upload_cache;
75 StagingBuffersCache download_cache; 99 StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..956f86845 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -18,9 +18,7 @@
18#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) 18#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
19 19
20namespace Vulkan { 20namespace Vulkan {
21
22namespace { 21namespace {
23
24using namespace Dirty; 22using namespace Dirty;
25using namespace VideoCommon::Dirty; 23using namespace VideoCommon::Dirty;
26using Tegra::Engines::Maxwell3D; 24using Tegra::Engines::Maxwell3D;
@@ -30,15 +28,18 @@ using Table = Maxwell3D::DirtyState::Table;
30using Flags = Maxwell3D::DirtyState::Flags; 28using Flags = Maxwell3D::DirtyState::Flags;
31 29
32Flags MakeInvalidationFlags() { 30Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{ 31 static constexpr int INVALIDATION_FLAGS[]{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 33 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, 34 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
37 }; 35 };
38 Flags flags{}; 36 Flags flags{};
39 for (const int flag : INVALIDATION_FLAGS) { 37 for (const int flag : INVALIDATION_FLAGS) {
40 flags[flag] = true; 38 flags[flag] = true;
41 } 39 }
40 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
41 flags[index] = true;
42 }
42 return flags; 43 return flags;
43} 44}
44 45
@@ -125,12 +126,40 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
125 tables[0][OFF(stencil_enable)] = StencilTestEnable; 126 tables[0][OFF(stencil_enable)] = StencilTestEnable;
126} 127}
127 128
129void SetupDirtyBlending(Tables& tables) {
130 tables[0][OFF(color_mask_common)] = Blending;
131 tables[0][OFF(independent_blend_enable)] = Blending;
132 FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
133 FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
134 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
135}
136
137void SetupDirtyInstanceDivisors(Tables& tables) {
138 static constexpr size_t divisor_offset = 3;
139 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
140 tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
141 tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
142 InstanceDivisors;
143 }
144}
145
146void SetupDirtyVertexAttributes(Tables& tables) {
147 FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
148}
149
150void SetupDirtyViewportSwizzles(Tables& tables) {
151 static constexpr size_t swizzle_offset = 6;
152 for (size_t index = 0; index < Regs::NumViewports; ++index) {
153 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
154 ViewportSwizzles;
155 }
156}
128} // Anonymous namespace 157} // Anonymous namespace
129 158
130StateTracker::StateTracker(Tegra::GPU& gpu) 159StateTracker::StateTracker(Tegra::GPU& gpu)
131 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 160 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
132 auto& tables = gpu.Maxwell3D().dirty.tables; 161 auto& tables = gpu.Maxwell3D().dirty.tables;
133 SetupDirtyRenderTargets(tables); 162 SetupDirtyFlags(tables);
134 SetupDirtyViewports(tables); 163 SetupDirtyViewports(tables);
135 SetupDirtyScissors(tables); 164 SetupDirtyScissors(tables);
136 SetupDirtyDepthBias(tables); 165 SetupDirtyDepthBias(tables);
@@ -145,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
145 SetupDirtyFrontFace(tables); 174 SetupDirtyFrontFace(tables);
146 SetupDirtyStencilOp(tables); 175 SetupDirtyStencilOp(tables);
147 SetupDirtyStencilTestEnable(tables); 176 SetupDirtyStencilTestEnable(tables);
177 SetupDirtyBlending(tables);
178 SetupDirtyInstanceDivisors(tables);
179 SetupDirtyVertexAttributes(tables);
180 SetupDirtyViewportSwizzles(tables);
148} 181}
149 182
150} // namespace Vulkan 183} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index c335d2bdf..84e918a71 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -35,6 +35,11 @@ enum : u8 {
35 StencilOp, 35 StencilOp,
36 StencilTestEnable, 36 StencilTestEnable,
37 37
38 Blending,
39 InstanceDivisors,
40 VertexAttributes,
41 ViewportSwizzles,
42
38 Last 43 Last
39}; 44};
40static_assert(Last <= std::numeric_limits<u8>::max()); 45static_assert(Last <= std::numeric_limits<u8>::max());
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 725a2a05d..0b63bd6c8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
56 56
57} // Anonymous namespace 57} // Anonymous namespace
58 58
59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) 59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
60 : surface{surface_}, device{device_}, scheduler{scheduler_} {} 60 u32 width, u32 height, bool srgb)
61 : surface{surface_}, device{device_}, scheduler{scheduler_} {
62 Create(width, height, srgb);
63}
61 64
62VKSwapchain::~VKSwapchain() = default; 65VKSwapchain::~VKSwapchain() = default;
63 66
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 2eadd62b3..a728511e0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -20,7 +20,8 @@ class VKScheduler;
20 20
21class VKSwapchain { 21class VKSwapchain {
22public: 22public:
23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); 23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
24 u32 width, u32 height, bool srgb);
24 ~VKSwapchain(); 25 ~VKSwapchain();
25 26
26 /// Creates (or recreates) the swapchain with a given size. 27 /// Creates (or recreates) the swapchain with a given size.
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, 426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
427 VkImageAspectFlags aspect_mask, bool is_initialized, 427 VkImageAspectFlags aspect_mask, bool is_initialized,
428 std::span<const VkBufferImageCopy> copies) { 428 std::span<const VkBufferImageCopy> copies) {
429 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | 429 static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
430 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 430 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
432 static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
433 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
434 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
432 const VkImageMemoryBarrier read_barrier{ 435 const VkImageMemoryBarrier read_barrier{
433 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 436 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
434 .pNext = nullptr, 437 .pNext = nullptr,
435 .srcAccessMask = ACCESS_FLAGS, 438 .srcAccessMask = WRITE_ACCESS_FLAGS,
436 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 439 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
437 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, 440 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
438 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 441 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
439 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 442 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
440 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 443 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
441 .image = image, 444 .image = image,
442 .subresourceRange = 445 .subresourceRange{
443 { 446 .aspectMask = aspect_mask,
444 .aspectMask = aspect_mask, 447 .baseMipLevel = 0,
445 .baseMipLevel = 0, 448 .levelCount = VK_REMAINING_MIP_LEVELS,
446 .levelCount = VK_REMAINING_MIP_LEVELS, 449 .baseArrayLayer = 0,
447 .baseArrayLayer = 0, 450 .layerCount = VK_REMAINING_ARRAY_LAYERS,
448 .layerCount = VK_REMAINING_ARRAY_LAYERS, 451 },
449 },
450 }; 452 };
451 const VkImageMemoryBarrier write_barrier{ 453 const VkImageMemoryBarrier write_barrier{
452 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 454 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
453 .pNext = nullptr, 455 .pNext = nullptr,
454 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 456 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
455 .dstAccessMask = ACCESS_FLAGS, 457 .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
456 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 458 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
457 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 459 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
458 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 460 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
459 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 461 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
460 .image = image, 462 .image = image,
461 .subresourceRange = 463 .subresourceRange{
462 { 464 .aspectMask = aspect_mask,
463 .aspectMask = aspect_mask, 465 .baseMipLevel = 0,
464 .baseMipLevel = 0, 466 .levelCount = VK_REMAINING_MIP_LEVELS,
465 .levelCount = VK_REMAINING_MIP_LEVELS, 467 .baseArrayLayer = 0,
466 .baseArrayLayer = 0, 468 .layerCount = VK_REMAINING_ARRAY_LAYERS,
467 .layerCount = VK_REMAINING_ARRAY_LAYERS, 469 },
468 },
469 }; 470 };
470 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 471 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
471 read_barrier); 472 read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
569 scheduler.Finish(); 570 scheduler.Finish();
570} 571}
571 572
572ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { 573StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
573 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); 574 return staging_buffer_pool.Request(size, MemoryUsage::Upload);
574 return {
575 .handle = staging_ref.buffer,
576 .span = staging_ref.mapped_span,
577 };
578} 575}
579 576
580ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { 577StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
581 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); 578 return staging_buffer_pool.Request(size, MemoryUsage::Download);
582 return {
583 .handle = staging_ref.buffer,
584 .span = staging_ref.mapped_span,
585 };
586} 579}
587 580
588void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 581void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
754 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 747 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
755 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 748 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
756 VK_ACCESS_TRANSFER_WRITE_BIT, 749 VK_ACCESS_TRANSFER_WRITE_BIT,
757 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 750 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
758 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 751 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
759 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 752 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
760 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 753 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
765 VkImageMemoryBarrier{ 758 VkImageMemoryBarrier{
766 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 759 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
767 .pNext = nullptr, 760 .pNext = nullptr,
768 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | 761 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
769 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
770 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
771 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
772 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 762 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
773 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, 763 VK_ACCESS_TRANSFER_WRITE_BIT,
774 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 764 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
775 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 765 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
776 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 766 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
828 } 818 }
829} 819}
830 820
831void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 821void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
832 std::span<const BufferImageCopy> copies) {
833 // TODO: Move this to another API 822 // TODO: Move this to another API
834 scheduler->RequestOutsideRenderPassOperationContext(); 823 scheduler->RequestOutsideRenderPassOperationContext();
835 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 824 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
836 const VkBuffer src_buffer = map.handle; 825 const VkBuffer src_buffer = map.buffer;
837 const VkImage vk_image = *image; 826 const VkImage vk_image = *image;
838 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 827 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
839 const bool is_initialized = std::exchange(initialized, true); 828 const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
843 }); 832 });
844} 833}
845 834
846void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 835void Image::UploadMemory(const StagingBufferRef& map,
847 std::span<const VideoCommon::BufferCopy> copies) { 836 std::span<const VideoCommon::BufferCopy> copies) {
848 // TODO: Move this to another API 837 // TODO: Move this to another API
849 scheduler->RequestOutsideRenderPassOperationContext(); 838 scheduler->RequestOutsideRenderPassOperationContext();
850 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); 839 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
851 const VkBuffer src_buffer = map.handle; 840 const VkBuffer src_buffer = map.buffer;
852 const VkBuffer dst_buffer = *buffer; 841 const VkBuffer dst_buffer = *buffer;
853 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { 842 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
854 // TODO: Barriers 843 // TODO: Barriers
@@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
856 }); 845 });
857} 846}
858 847
859void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 848void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
860 std::span<const BufferImageCopy> copies) { 849 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
861 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 850 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
862 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
863 vk_copies](vk::CommandBuffer cmdbuf) { 851 vk_copies](vk::CommandBuffer cmdbuf) {
864 // TODO: Barriers 852 const VkImageMemoryBarrier read_barrier{
865 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); 853 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
854 .pNext = nullptr,
855 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
856 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
857 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
858 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
859 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
860 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
861 .image = image,
862 .subresourceRange{
863 .aspectMask = aspect_mask,
864 .baseMipLevel = 0,
865 .levelCount = VK_REMAINING_MIP_LEVELS,
866 .baseArrayLayer = 0,
867 .layerCount = VK_REMAINING_ARRAY_LAYERS,
868 },
869 };
870 const VkImageMemoryBarrier image_write_barrier{
871 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
872 .pNext = nullptr,
873 .srcAccessMask = 0,
874 .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
875 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
876 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
877 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
878 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
879 .image = image,
880 .subresourceRange{
881 .aspectMask = aspect_mask,
882 .baseMipLevel = 0,
883 .levelCount = VK_REMAINING_MIP_LEVELS,
884 .baseArrayLayer = 0,
885 .layerCount = VK_REMAINING_ARRAY_LAYERS,
886 },
887 };
888 const VkMemoryBarrier memory_write_barrier{
889 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
890 .pNext = nullptr,
891 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
892 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
893 };
894 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
895 0, read_barrier);
896 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
897 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
898 0, memory_write_barrier, nullptr, image_write_barrier);
866 }); 899 });
867} 900}
868 901
@@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1127 .pAttachments = attachments.data(), 1160 .pAttachments = attachments.data(),
1128 .width = key.size.width, 1161 .width = key.size.width,
1129 .height = key.size.height, 1162 .height = key.size.height,
1130 .layers = static_cast<u32>(num_layers), 1163 .layers = static_cast<u32>(std::max(num_layers, 1)),
1131 }); 1164 });
1132 if (runtime.device.HasDebuggingToolAttached()) { 1165 if (runtime.device.HasDebuggingToolAttached()) {
1133 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); 1166 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
10#include "video_core/texture_cache/texture_cache.h" 11#include "video_core/texture_cache/texture_cache.h"
11#include "video_core/vulkan_common/vulkan_memory_allocator.h" 12#include "video_core/vulkan_common/vulkan_memory_allocator.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
53 54
54namespace Vulkan { 55namespace Vulkan {
55 56
56struct ImageBufferMap {
57 [[nodiscard]] VkBuffer Handle() const noexcept {
58 return handle;
59 }
60
61 [[nodiscard]] std::span<u8> Span() const noexcept {
62 return span;
63 }
64
65 VkBuffer handle;
66 std::span<u8> span;
67};
68
69struct TextureCacheRuntime { 57struct TextureCacheRuntime {
70 const Device& device; 58 const Device& device;
71 VKScheduler& scheduler; 59 VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
76 64
77 void Finish(); 65 void Finish();
78 66
79 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); 67 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
80 68
81 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); 69 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
82 70
83 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 71 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
84 const std::array<Offset2D, 2>& dst_region, 72 const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
94 return false; 82 return false;
95 } 83 }
96 84
97 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, 85 void AccelerateImageUpload(Image&, const StagingBufferRef&,
98 std::span<const VideoCommon::SwizzleParameters>) { 86 std::span<const VideoCommon::SwizzleParameters>) {
99 UNREACHABLE(); 87 UNREACHABLE();
100 } 88 }
@@ -112,13 +100,12 @@ public:
112 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
113 VAddr cpu_addr); 101 VAddr cpu_addr);
114 102
115 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 103 void UploadMemory(const StagingBufferRef& map,
116 std::span<const VideoCommon::BufferImageCopy> copies); 104 std::span<const VideoCommon::BufferImageCopy> copies);
117 105
118 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 106 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
119 std::span<const VideoCommon::BufferCopy> copies);
120 107
121 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 108 void DownloadMemory(const StagingBufferRef& map,
122 std::span<const VideoCommon::BufferImageCopy> copies); 109 std::span<const VideoCommon::BufferImageCopy> copies);
123 110
124 [[nodiscard]] VkImage Handle() const noexcept { 111 [[nodiscard]] VkImage Handle() const noexcept {
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 3b40db9bc..02adcf9c7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -64,6 +64,7 @@ void AsyncShaders::FreeWorkers() {
64 64
65void AsyncShaders::KillWorkers() { 65void AsyncShaders::KillWorkers() {
66 is_thread_exiting.store(true); 66 is_thread_exiting.store(true);
67 cv.notify_all();
67 for (auto& thread : worker_threads) { 68 for (auto& thread : worker_threads) {
68 thread.detach(); 69 thread.detach();
69 } 70 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 0dbb1a31f..7fdff6e56 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -9,16 +9,7 @@
9#include <shared_mutex> 9#include <shared_mutex>
10#include <thread> 10#include <thread>
11 11
12// This header includes both Vulkan and OpenGL headers, this has to be fixed
13// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
14// Forcefully include glad early and undefine macros
15#include <glad/glad.h> 12#include <glad/glad.h>
16#ifdef CreateEvent
17#undef CreateEvent
18#endif
19#ifdef CreateSemaphore
20#undef CreateSemaphore
21#endif
22 13
23#include "common/common_types.h" 14#include "common/common_types.h"
24#include "video_core/renderer_opengl/gl_device.h" 15#include "video_core/renderer_opengl/gl_device.h"
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d3ea07aac..5f88537bc 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
76 case SystemVariable::InvocationId: 76 case SystemVariable::InvocationId:
77 return Operation(OperationCode::InvocationId); 77 return Operation(OperationCode::InvocationId);
78 case SystemVariable::Ydirection: 78 case SystemVariable::Ydirection:
79 uses_y_negate = true;
79 return Operation(OperationCode::YNegate); 80 return Operation(OperationCode::YNegate);
80 case SystemVariable::InvocationInfo: 81 case SystemVariable::InvocationInfo:
81 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); 82 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0c6ab0f07..1cd7c14d7 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -139,6 +139,10 @@ public:
139 return uses_legacy_varyings; 139 return uses_legacy_varyings;
140 } 140 }
141 141
142 bool UsesYNegate() const {
143 return uses_y_negate;
144 }
145
142 bool UsesWarps() const { 146 bool UsesWarps() const {
143 return uses_warps; 147 return uses_warps;
144 } 148 }
@@ -465,6 +469,7 @@ private:
465 bool uses_instance_id{}; 469 bool uses_instance_id{};
466 bool uses_vertex_id{}; 470 bool uses_vertex_id{};
467 bool uses_legacy_varyings{}; 471 bool uses_legacy_varyings{};
472 bool uses_y_negate{};
468 bool uses_warps{}; 473 bool uses_warps{};
469 bool uses_indexed_samplers{}; 474 bool uses_indexed_samplers{};
470 475
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d1080300f..b1da69971 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -103,9 +103,6 @@ public:
103 /// Notify the cache that a new frame has been queued 103 /// Notify the cache that a new frame has been queued
104 void TickFrame(); 104 void TickFrame();
105 105
106 /// Return an unique mutually exclusive lock for the cache
107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
108
109 /// Return a constant reference to the given image view id 106 /// Return a constant reference to the given image view id
110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; 107 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
111 108
@@ -179,6 +176,8 @@ public:
179 /// Return true when a CPU region is modified from the GPU 176 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 177 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
181 178
179 std::mutex mutex;
180
182private: 181private:
183 /// Iterate over all page indices in a range 182 /// Iterate over all page indices in a range
184 template <typename Func> 183 template <typename Func>
@@ -212,8 +211,8 @@ private:
212 void RefreshContents(Image& image); 211 void RefreshContents(Image& image);
213 212
214 /// Upload data from guest to an image 213 /// Upload data from guest to an image
215 template <typename MapBuffer> 214 template <typename StagingBuffer>
216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); 215 void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
217 216
218 /// Find or create an image view from a guest descriptor 217 /// Find or create an image view from a guest descriptor
219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); 218 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -325,8 +324,6 @@ private:
325 324
326 RenderTargets render_targets; 325 RenderTargets render_targets;
327 326
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views; 327 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers; 328 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 329 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
@@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() {
386} 383}
387 384
388template <class P> 385template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { 386const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id]; 387 return slot_image_views[id];
396} 388}
@@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
598 }); 590 });
599 for (const ImageId image_id : images) { 591 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id]; 592 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); 593 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info); 594 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies); 595 image.DownloadMemory(map, copies);
604 runtime.Finish(); 596 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); 597 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
606 } 598 }
607} 599}
608 600
@@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() {
757 for (const ImageId image_id : download_ids) { 749 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 750 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 } 751 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes); 752 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
761 size_t buffer_offset = 0; 753 const size_t original_offset = download_map.offset;
762 for (const ImageId image_id : download_ids) { 754 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id]; 755 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info); 756 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies); 757 image.DownloadMemory(download_map, copies);
766 buffer_offset += image.unswizzled_size_bytes; 758 download_map.offset += image.unswizzled_size_bytes;
767 } 759 }
768 // Wait for downloads to finish 760 // Wait for downloads to finish
769 runtime.Finish(); 761 runtime.Finish();
770 762
771 buffer_offset = 0; 763 download_map.offset = original_offset;
772 const std::span<u8> download_span = download_map.Span(); 764 std::span<u8> download_span = download_map.mapped_span;
773 for (const ImageId image_id : download_ids) { 765 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id]; 766 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info); 767 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset); 768 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); 769 download_map.offset += image.unswizzled_size_bytes;
778 buffer_offset += image.unswizzled_size_bytes; 770 download_span = download_span.subspan(image.unswizzled_size_bytes);
779 } 771 }
780 committed_downloads.pop(); 772 committed_downloads.pop();
781} 773}
@@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) {
806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 798 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
807 return; 799 return;
808 } 800 }
809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); 801 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
810 UploadImageContents(image, map, 0); 802 UploadImageContents(image, staging);
811 runtime.InsertUploadMemoryBarrier(); 803 runtime.InsertUploadMemoryBarrier();
812} 804}
813 805
814template <class P> 806template <class P>
815template <typename MapBuffer> 807template <typename StagingBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { 808void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); 809 const std::span<u8> mapped_span = staging.mapped_span;
818 const GPUVAddr gpu_addr = image.gpu_addr; 810 const GPUVAddr gpu_addr = image.gpu_addr;
819 811
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 812 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 813 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info); 814 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); 815 runtime.AccelerateImageUpload(image, staging, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) { 816 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 817 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 818 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 819 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies); 820 image.UploadMemory(staging, copies);
829 } else if (image.info.type == ImageType::Buffer) { 821 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; 822 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies); 823 image.UploadMemory(staging, copies);
832 } else { 824 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 825 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies); 826 image.UploadMemory(staging, copies);
835 } 827 }
836} 828}
837 829
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 53444e945..e1b38c6ac 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -38,19 +38,18 @@ namespace VideoCore {
38 38
39std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { 39std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
40 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); 40 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
41 std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( 41 const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
42 system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); 42 auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
43
44 auto context = emu_window.CreateSharedContext(); 43 auto context = emu_window.CreateSharedContext();
45 const auto scope = context->Acquire(); 44 auto scope = context->Acquire();
46 45 try {
47 auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); 46 auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
48 if (!renderer->Init()) { 47 gpu->BindRenderer(std::move(renderer));
48 return gpu;
49 } catch (const std::runtime_error& exception) {
50 LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
49 return nullptr; 51 return nullptr;
50 } 52 }
51
52 gpu->BindRenderer(std::move(renderer));
53 return gpu;
54} 53}
55 54
56u16 GetResolutionScaleFactor(const RendererBase& renderer) { 55u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 51f53bc39..34d396434 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -18,27 +18,22 @@
18#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
19 19
20namespace Vulkan { 20namespace Vulkan {
21
22namespace { 21namespace {
23
24namespace Alternatives { 22namespace Alternatives {
25 23constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
26constexpr std::array Depth24UnormS8_UINT{
27 VK_FORMAT_D32_SFLOAT_S8_UINT, 24 VK_FORMAT_D32_SFLOAT_S8_UINT,
28 VK_FORMAT_D16_UNORM_S8_UINT, 25 VK_FORMAT_D16_UNORM_S8_UINT,
29 VkFormat{}, 26 VK_FORMAT_UNDEFINED,
30}; 27};
31 28
32constexpr std::array Depth16UnormS8_UINT{ 29constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
33 VK_FORMAT_D24_UNORM_S8_UINT, 30 VK_FORMAT_D24_UNORM_S8_UINT,
34 VK_FORMAT_D32_SFLOAT_S8_UINT, 31 VK_FORMAT_D32_SFLOAT_S8_UINT,
35 VkFormat{}, 32 VK_FORMAT_UNDEFINED,
36}; 33};
37
38} // namespace Alternatives 34} // namespace Alternatives
39 35
40constexpr std::array REQUIRED_EXTENSIONS{ 36constexpr std::array REQUIRED_EXTENSIONS{
41 VK_KHR_SWAPCHAIN_EXTENSION_NAME,
42 VK_KHR_MAINTENANCE1_EXTENSION_NAME, 37 VK_KHR_MAINTENANCE1_EXTENSION_NAME,
43 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, 38 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
44 VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, 39 VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
@@ -51,7 +46,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
51 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 46 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
52 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 47 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
53 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 48 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
49 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
54 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, 50 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
51#ifdef _WIN32
52 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
53#endif
54#ifdef __linux__
55 VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
56#endif
55}; 57};
56 58
57template <typename T> 59template <typename T>
@@ -63,9 +65,9 @@ void SetNext(void**& next, T& data) {
63constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { 65constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
64 switch (format) { 66 switch (format) {
65 case VK_FORMAT_D24_UNORM_S8_UINT: 67 case VK_FORMAT_D24_UNORM_S8_UINT:
66 return Alternatives::Depth24UnormS8_UINT.data(); 68 return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
67 case VK_FORMAT_D16_UNORM_S8_UINT: 69 case VK_FORMAT_D16_UNORM_S8_UINT:
68 return Alternatives::Depth16UnormS8_UINT.data(); 70 return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
69 default: 71 default:
70 return nullptr; 72 return nullptr;
71 } 73 }
@@ -195,78 +197,77 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
195 const vk::InstanceDispatch& dld_) 197 const vk::InstanceDispatch& dld_)
196 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 198 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
197 format_properties{GetFormatProperties(physical)} { 199 format_properties{GetFormatProperties(physical)} {
198 CheckSuitability(); 200 CheckSuitability(surface != nullptr);
199 SetupFamilies(surface); 201 SetupFamilies(surface);
200 SetupFeatures(); 202 SetupFeatures();
201 203
202 const auto queue_cis = GetDeviceQueueCreateInfos(); 204 const auto queue_cis = GetDeviceQueueCreateInfos();
203 const std::vector extensions = LoadExtensions(); 205 const std::vector extensions = LoadExtensions(surface != nullptr);
204 206
205 VkPhysicalDeviceFeatures2 features2{ 207 VkPhysicalDeviceFeatures2 features2{
206 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, 208 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
207 .pNext = nullptr, 209 .pNext = nullptr,
208 .features{}, 210 .features{
211 .robustBufferAccess = true,
212 .fullDrawIndexUint32 = false,
213 .imageCubeArray = true,
214 .independentBlend = true,
215 .geometryShader = true,
216 .tessellationShader = true,
217 .sampleRateShading = false,
218 .dualSrcBlend = false,
219 .logicOp = false,
220 .multiDrawIndirect = false,
221 .drawIndirectFirstInstance = false,
222 .depthClamp = true,
223 .depthBiasClamp = true,
224 .fillModeNonSolid = false,
225 .depthBounds = false,
226 .wideLines = false,
227 .largePoints = true,
228 .alphaToOne = false,
229 .multiViewport = true,
230 .samplerAnisotropy = true,
231 .textureCompressionETC2 = false,
232 .textureCompressionASTC_LDR = is_optimal_astc_supported,
233 .textureCompressionBC = false,
234 .occlusionQueryPrecise = true,
235 .pipelineStatisticsQuery = false,
236 .vertexPipelineStoresAndAtomics = true,
237 .fragmentStoresAndAtomics = true,
238 .shaderTessellationAndGeometryPointSize = false,
239 .shaderImageGatherExtended = true,
240 .shaderStorageImageExtendedFormats = false,
241 .shaderStorageImageMultisample = is_shader_storage_image_multisample,
242 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
243 .shaderStorageImageWriteWithoutFormat = true,
244 .shaderUniformBufferArrayDynamicIndexing = false,
245 .shaderSampledImageArrayDynamicIndexing = false,
246 .shaderStorageBufferArrayDynamicIndexing = false,
247 .shaderStorageImageArrayDynamicIndexing = false,
248 .shaderClipDistance = false,
249 .shaderCullDistance = false,
250 .shaderFloat64 = false,
251 .shaderInt64 = false,
252 .shaderInt16 = false,
253 .shaderResourceResidency = false,
254 .shaderResourceMinLod = false,
255 .sparseBinding = false,
256 .sparseResidencyBuffer = false,
257 .sparseResidencyImage2D = false,
258 .sparseResidencyImage3D = false,
259 .sparseResidency2Samples = false,
260 .sparseResidency4Samples = false,
261 .sparseResidency8Samples = false,
262 .sparseResidency16Samples = false,
263 .sparseResidencyAliased = false,
264 .variableMultisampleRate = false,
265 .inheritedQueries = false,
266 },
209 }; 267 };
210 const void* first_next = &features2; 268 const void* first_next = &features2;
211 void** next = &features2.pNext; 269 void** next = &features2.pNext;
212 270
213 features2.features = {
214 .robustBufferAccess = false,
215 .fullDrawIndexUint32 = false,
216 .imageCubeArray = true,
217 .independentBlend = true,
218 .geometryShader = true,
219 .tessellationShader = true,
220 .sampleRateShading = false,
221 .dualSrcBlend = false,
222 .logicOp = false,
223 .multiDrawIndirect = false,
224 .drawIndirectFirstInstance = false,
225 .depthClamp = true,
226 .depthBiasClamp = true,
227 .fillModeNonSolid = false,
228 .depthBounds = false,
229 .wideLines = false,
230 .largePoints = true,
231 .alphaToOne = false,
232 .multiViewport = true,
233 .samplerAnisotropy = true,
234 .textureCompressionETC2 = false,
235 .textureCompressionASTC_LDR = is_optimal_astc_supported,
236 .textureCompressionBC = false,
237 .occlusionQueryPrecise = true,
238 .pipelineStatisticsQuery = false,
239 .vertexPipelineStoresAndAtomics = true,
240 .fragmentStoresAndAtomics = true,
241 .shaderTessellationAndGeometryPointSize = false,
242 .shaderImageGatherExtended = true,
243 .shaderStorageImageExtendedFormats = false,
244 .shaderStorageImageMultisample = is_shader_storage_image_multisample,
245 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
246 .shaderStorageImageWriteWithoutFormat = true,
247 .shaderUniformBufferArrayDynamicIndexing = false,
248 .shaderSampledImageArrayDynamicIndexing = false,
249 .shaderStorageBufferArrayDynamicIndexing = false,
250 .shaderStorageImageArrayDynamicIndexing = false,
251 .shaderClipDistance = false,
252 .shaderCullDistance = false,
253 .shaderFloat64 = false,
254 .shaderInt64 = false,
255 .shaderInt16 = false,
256 .shaderResourceResidency = false,
257 .shaderResourceMinLod = false,
258 .sparseBinding = false,
259 .sparseResidencyBuffer = false,
260 .sparseResidencyImage2D = false,
261 .sparseResidencyImage3D = false,
262 .sparseResidency2Samples = false,
263 .sparseResidency4Samples = false,
264 .sparseResidency8Samples = false,
265 .sparseResidency16Samples = false,
266 .sparseResidencyAliased = false,
267 .variableMultisampleRate = false,
268 .inheritedQueries = false,
269 };
270 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ 271 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
271 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, 272 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
272 .pNext = nullptr, 273 .pNext = nullptr,
@@ -379,20 +380,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
379 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 380 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
380 } 381 }
381 382
382 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
383 if (ext_robustness2) {
384 robustness2 = {
385 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
386 .pNext = nullptr,
387 .robustBufferAccess2 = false,
388 .robustImageAccess2 = true,
389 .nullDescriptor = true,
390 };
391 SetNext(next, robustness2);
392 } else {
393 LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
394 }
395
396 if (!ext_depth_range_unrestricted) { 383 if (!ext_depth_range_unrestricted) {
397 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 384 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
398 } 385 }
@@ -535,16 +522,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
535 return (supported_usage & wanted_usage) == wanted_usage; 522 return (supported_usage & wanted_usage) == wanted_usage;
536} 523}
537 524
538void Device::CheckSuitability() const { 525void Device::CheckSuitability(bool requires_swapchain) const {
539 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; 526 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
527 bool has_swapchain = false;
540 for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { 528 for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
541 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { 529 const std::string_view name{property.extensionName};
530 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
542 if (available_extensions[i]) { 531 if (available_extensions[i]) {
543 continue; 532 continue;
544 } 533 }
545 const std::string_view name{property.extensionName};
546 available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; 534 available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
547 } 535 }
536 has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
548 } 537 }
549 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { 538 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
550 if (available_extensions[i]) { 539 if (available_extensions[i]) {
@@ -553,6 +542,11 @@ void Device::CheckSuitability() const {
553 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); 542 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
554 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 543 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
555 } 544 }
545 if (requires_swapchain && !has_swapchain) {
546 LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
547 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
548 }
549
556 struct LimitTuple { 550 struct LimitTuple {
557 u32 minimum; 551 u32 minimum;
558 u32 value; 552 u32 value;
@@ -572,9 +566,20 @@ void Device::CheckSuitability() const {
572 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 566 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
573 } 567 }
574 } 568 }
575 const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; 569 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
570 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
571
572 VkPhysicalDeviceFeatures2 features2{};
573 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
574 features2.pNext = &robustness2;
575
576 physical.GetFeatures2KHR(features2);
577
578 const VkPhysicalDeviceFeatures& features{features2.features};
576 const std::array feature_report{ 579 const std::array feature_report{
580 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
577 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 581 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
582 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
578 std::make_pair(features.imageCubeArray, "imageCubeArray"), 583 std::make_pair(features.imageCubeArray, "imageCubeArray"),
579 std::make_pair(features.independentBlend, "independentBlend"), 584 std::make_pair(features.independentBlend, "independentBlend"),
580 std::make_pair(features.depthClamp, "depthClamp"), 585 std::make_pair(features.depthClamp, "depthClamp"),
@@ -589,6 +594,9 @@ void Device::CheckSuitability() const {
589 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 594 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
590 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 595 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
591 "shaderStorageImageWriteWithoutFormat"), 596 "shaderStorageImageWriteWithoutFormat"),
597 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
598 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
599 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
592 }; 600 };
593 for (const auto& [is_supported, name] : feature_report) { 601 for (const auto& [is_supported, name] : feature_report) {
594 if (is_supported) { 602 if (is_supported) {
@@ -599,17 +607,19 @@ void Device::CheckSuitability() const {
599 } 607 }
600} 608}
601 609
602std::vector<const char*> Device::LoadExtensions() { 610std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
603 std::vector<const char*> extensions; 611 std::vector<const char*> extensions;
604 extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); 612 extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
605 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); 613 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
614 if (requires_surface) {
615 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
616 }
606 617
607 bool has_khr_shader_float16_int8{}; 618 bool has_khr_shader_float16_int8{};
608 bool has_ext_subgroup_size_control{}; 619 bool has_ext_subgroup_size_control{};
609 bool has_ext_transform_feedback{}; 620 bool has_ext_transform_feedback{};
610 bool has_ext_custom_border_color{}; 621 bool has_ext_custom_border_color{};
611 bool has_ext_extended_dynamic_state{}; 622 bool has_ext_extended_dynamic_state{};
612 bool has_ext_robustness2{};
613 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 623 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
614 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 624 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
615 bool push) { 625 bool push) {
@@ -637,14 +647,12 @@ std::vector<const char*> Device::LoadExtensions() {
637 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 647 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
638 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 648 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
639 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 649 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
640 test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
641 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 650 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
642 if (Settings::values.renderer_debug) { 651 if (Settings::values.renderer_debug) {
643 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, 652 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
644 true); 653 true);
645 } 654 }
646 } 655 }
647
648 VkPhysicalDeviceFeatures2KHR features; 656 VkPhysicalDeviceFeatures2KHR features;
649 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; 657 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
650 658
@@ -661,7 +669,6 @@ std::vector<const char*> Device::LoadExtensions() {
661 is_float16_supported = float16_int8_features.shaderFloat16; 669 is_float16_supported = float16_int8_features.shaderFloat16;
662 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 670 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
663 } 671 }
664
665 if (has_ext_subgroup_size_control) { 672 if (has_ext_subgroup_size_control) {
666 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; 673 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
667 subgroup_features.sType = 674 subgroup_features.sType =
@@ -688,7 +695,6 @@ std::vector<const char*> Device::LoadExtensions() {
688 } else { 695 } else {
689 is_warp_potentially_bigger = true; 696 is_warp_potentially_bigger = true;
690 } 697 }
691
692 if (has_ext_transform_feedback) { 698 if (has_ext_transform_feedback) {
693 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; 699 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
694 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; 700 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -710,7 +716,6 @@ std::vector<const char*> Device::LoadExtensions() {
710 ext_transform_feedback = true; 716 ext_transform_feedback = true;
711 } 717 }
712 } 718 }
713
714 if (has_ext_custom_border_color) { 719 if (has_ext_custom_border_color) {
715 VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; 720 VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
716 border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; 721 border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
@@ -723,7 +728,6 @@ std::vector<const char*> Device::LoadExtensions() {
723 ext_custom_border_color = true; 728 ext_custom_border_color = true;
724 } 729 }
725 } 730 }
726
727 if (has_ext_extended_dynamic_state) { 731 if (has_ext_extended_dynamic_state) {
728 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; 732 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
729 dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; 733 dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
@@ -736,19 +740,6 @@ std::vector<const char*> Device::LoadExtensions() {
736 ext_extended_dynamic_state = true; 740 ext_extended_dynamic_state = true;
737 } 741 }
738 } 742 }
739
740 if (has_ext_robustness2) {
741 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
742 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
743 robustness2.pNext = nullptr;
744 features.pNext = &robustness2;
745 physical.GetFeatures2KHR(features);
746 if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
747 extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
748 ext_robustness2 = true;
749 }
750 }
751
752 return extensions; 743 return extensions;
753} 744}
754 745
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4b66dba7a..67d70cd22 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer };
23const u32 GuestWarpSize = 32; 23const u32 GuestWarpSize = 32;
24 24
25/// Handles data specific to a physical device. 25/// Handles data specific to a physical device.
26class Device final { 26class Device {
27public: 27public:
28 explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, 28 explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
29 const vk::InstanceDispatch& dld); 29 const vk::InstanceDispatch& dld);
@@ -227,10 +227,10 @@ public:
227 227
228private: 228private:
229 /// Checks if the physical device is suitable. 229 /// Checks if the physical device is suitable.
230 void CheckSuitability() const; 230 void CheckSuitability(bool requires_swapchain) const;
231 231
232 /// Loads extensions into a vector and stores available ones in this object. 232 /// Loads extensions into a vector and stores available ones in this object.
233 std::vector<const char*> LoadExtensions(); 233 std::vector<const char*> LoadExtensions(bool requires_surface);
234 234
235 /// Sets up queue families. 235 /// Sets up queue families.
236 void SetupFamilies(VkSurfaceKHR surface); 236 void SetupFamilies(VkSurfaceKHR surface);
@@ -285,7 +285,6 @@ private:
285 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 285 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
286 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 286 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
287 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 287 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
288 bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
289 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 288 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
290 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 289 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
291 bool has_renderdoc{}; ///< Has RenderDoc attached 290 bool has_renderdoc{}; ///< Has RenderDoc attached
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 889ecda0c..bfd6e6add 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <future>
6#include <optional> 7#include <optional>
7#include <span> 8#include <span>
8#include <utility> 9#include <utility>
@@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
140 VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); 141 VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
141 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); 142 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
142 } 143 }
143 vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); 144 vk::Instance instance =
145 std::async([&] {
146 return vk::Instance::Create(required_version, layers, extensions, dld);
147 }).get();
144 if (!vk::Load(*instance, dld)) { 148 if (!vk::Load(*instance, dld)) {
145 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); 149 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
146 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 150 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index d6eb3af31..2a8b7a907 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -7,6 +7,8 @@
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9 9
10#include <glad/glad.h>
11
10#include "common/alignment.h" 12#include "common/alignment.h"
11#include "common/assert.h" 13#include "common/assert.h"
12#include "common/common_types.h" 14#include "common/common_types.h"
@@ -55,10 +57,24 @@ struct Range {
55 57
56class MemoryAllocation { 58class MemoryAllocation {
57public: 59public:
58 explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_, 60 explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
59 VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) 61 u64 allocation_size_, u32 type)
60 : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, 62 : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
61 property_flags{properties}, shifted_memory_type{1U << type} {} 63 shifted_memory_type{1U << type} {}
64
65#if defined(_WIN32) || defined(__linux__)
66 ~MemoryAllocation() {
67 if (owning_opengl_handle != 0) {
68 glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
69 }
70 }
71#endif
72
73 MemoryAllocation& operator=(const MemoryAllocation&) = delete;
74 MemoryAllocation(const MemoryAllocation&) = delete;
75
76 MemoryAllocation& operator=(MemoryAllocation&&) = delete;
77 MemoryAllocation(MemoryAllocation&&) = delete;
62 78
63 [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { 79 [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
64 const std::optional<u64> alloc = FindFreeRegion(size, alignment); 80 const std::optional<u64> alloc = FindFreeRegion(size, alignment);
@@ -88,6 +104,31 @@ public:
88 return memory_mapped_span; 104 return memory_mapped_span;
89 } 105 }
90 106
107#ifdef _WIN32
108 [[nodiscard]] u32 ExportOpenGLHandle() {
109 if (!owning_opengl_handle) {
110 glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
111 glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
112 GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
113 memory.GetMemoryWin32HandleKHR());
114 }
115 return owning_opengl_handle;
116 }
117#elif __linux__
118 [[nodiscard]] u32 ExportOpenGLHandle() {
119 if (!owning_opengl_handle) {
120 glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
121 glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
122 memory.GetMemoryFdKHR());
123 }
124 return owning_opengl_handle;
125 }
126#else
127 [[nodiscard]] u32 ExportOpenGLHandle() {
128 return 0;
129 }
130#endif
131
91 /// Returns whether this allocation is compatible with the arguments. 132 /// Returns whether this allocation is compatible with the arguments.
92 [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { 133 [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
93 return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; 134 return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
@@ -118,13 +159,15 @@ private:
118 return candidate; 159 return candidate;
119 } 160 }
120 161
121 const Device& device; ///< Vulkan device.
122 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 162 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
123 const u64 allocation_size; ///< Size of this allocation. 163 const u64 allocation_size; ///< Size of this allocation.
124 const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. 164 const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
125 const u32 shifted_memory_type; ///< Shifted Vulkan memory type. 165 const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
126 std::vector<Range> commits; ///< All commit ranges done from this allocation. 166 std::vector<Range> commits; ///< All commit ranges done from this allocation.
127 std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. 167 std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
168#if defined(_WIN32) || defined(__linux__)
169 u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
170#endif
128}; 171};
129 172
130MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, 173MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() {
156 return span; 199 return span;
157} 200}
158 201
202u32 MemoryCommit::ExportOpenGLHandle() const {
203 return allocation->ExportOpenGLHandle();
204}
205
159void MemoryCommit::Release() { 206void MemoryCommit::Release() {
160 if (allocation) { 207 if (allocation) {
161 allocation->Free(begin); 208 allocation->Free(begin);
162 } 209 }
163} 210}
164 211
165MemoryAllocator::MemoryAllocator(const Device& device_) 212MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
166 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} 213 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
214 export_allocations{export_allocations_} {}
167 215
168MemoryAllocator::~MemoryAllocator() = default; 216MemoryAllocator::~MemoryAllocator() = default;
169 217
@@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage)
196 244
197void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { 245void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
198 const u32 type = FindType(flags, type_mask).value(); 246 const u32 type = FindType(flags, type_mask).value();
247 const VkExportMemoryAllocateInfo export_allocate_info{
248 .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
249 .pNext = nullptr,
250#ifdef _WIN32
251 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
252#elif __linux__
253 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
254#else
255 .handleTypes = 0,
256#endif
257 };
199 vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ 258 vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
200 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 259 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
201 .pNext = nullptr, 260 .pNext = export_allocations ? &export_allocate_info : nullptr,
202 .allocationSize = size, 261 .allocationSize = size,
203 .memoryTypeIndex = type, 262 .memoryTypeIndex = type,
204 }); 263 });
205 allocations.push_back( 264 allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
206 std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type));
207} 265}
208 266
209std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, 267std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 9e6cfabf9..d1ce29450 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -43,6 +43,9 @@ public:
43 /// It will map the backing allocation if it hasn't been mapped before. 43 /// It will map the backing allocation if it hasn't been mapped before.
44 std::span<u8> Map(); 44 std::span<u8> Map();
45 45
46 /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
47 u32 ExportOpenGLHandle() const;
48
46 /// Returns the Vulkan memory handler. 49 /// Returns the Vulkan memory handler.
47 VkDeviceMemory Memory() const { 50 VkDeviceMemory Memory() const {
48 return memory; 51 return memory;
@@ -67,7 +70,15 @@ private:
67/// Allocates and releases memory allocations on demand. 70/// Allocates and releases memory allocations on demand.
68class MemoryAllocator { 71class MemoryAllocator {
69public: 72public:
70 explicit MemoryAllocator(const Device& device_); 73 /**
74 * Construct memory allocator
75 *
76 * @param device_ Device to allocate from
77 * @param export_allocations_ True when allocations have to be exported
78 *
79 * @throw vk::Exception on failure
80 */
81 explicit MemoryAllocator(const Device& device_, bool export_allocations_);
71 ~MemoryAllocator(); 82 ~MemoryAllocator();
72 83
73 MemoryAllocator& operator=(const MemoryAllocator&) = delete; 84 MemoryAllocator& operator=(const MemoryAllocator&) = delete;
@@ -106,8 +117,9 @@ private:
106 /// Returns index to the fastest memory type compatible with the passed requirements. 117 /// Returns index to the fastest memory type compatible with the passed requirements.
107 std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; 118 std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
108 119
109 const Device& device; ///< Device handle. 120 const Device& device; ///< Device handle.
110 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. 121 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
122 const bool export_allocations; ///< True when memory allocations have to be exported.
111 std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. 123 std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
112}; 124};
113 125
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 5e15ad607..2aa0ffbe6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
168 X(vkFreeCommandBuffers); 168 X(vkFreeCommandBuffers);
169 X(vkFreeDescriptorSets); 169 X(vkFreeDescriptorSets);
170 X(vkFreeMemory); 170 X(vkFreeMemory);
171 X(vkGetBufferMemoryRequirements); 171 X(vkGetBufferMemoryRequirements2);
172 X(vkGetDeviceQueue); 172 X(vkGetDeviceQueue);
173 X(vkGetEventStatus); 173 X(vkGetEventStatus);
174 X(vkGetFenceStatus); 174 X(vkGetFenceStatus);
175 X(vkGetImageMemoryRequirements); 175 X(vkGetImageMemoryRequirements);
176 X(vkGetMemoryFdKHR);
177#ifdef _WIN32
178 X(vkGetMemoryWin32HandleKHR);
179#endif
176 X(vkGetQueryPoolResults); 180 X(vkGetQueryPoolResults);
177 X(vkGetSemaphoreCounterValueKHR); 181 X(vkGetSemaphoreCounterValueKHR);
178 X(vkMapMemory); 182 X(vkMapMemory);
@@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const {
505 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); 509 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
506} 510}
507 511
512int DeviceMemory::GetMemoryFdKHR() const {
513 const VkMemoryGetFdInfoKHR get_fd_info{
514 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
515 .pNext = nullptr,
516 .memory = handle,
517 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
518 };
519 int fd;
520 Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd));
521 return fd;
522}
523
524#ifdef _WIN32
525HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const {
526 const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{
527 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
528 .pNext = nullptr,
529 .memory = handle,
530 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
531 };
532 HANDLE win32_handle;
533 Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle));
534 return win32_handle;
535}
536#endif
537
508void DeviceMemory::SetObjectNameEXT(const char* name) const { 538void DeviceMemory::SetObjectNameEXT(const char* name) const {
509 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); 539 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
510} 540}
@@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
756 return DeviceMemory(memory, handle, *dld); 786 return DeviceMemory(memory, handle, *dld);
757} 787}
758 788
759VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { 789VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
760 VkMemoryRequirements requirements; 790 void* pnext) const noexcept {
761 dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); 791 const VkBufferMemoryRequirementsInfo2 info{
762 return requirements; 792 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
793 .pNext = nullptr,
794 .buffer = buffer,
795 };
796 VkMemoryRequirements2 requirements{
797 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
798 .pNext = pnext,
799 .memoryRequirements{},
800 };
801 dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
802 return requirements.memoryRequirements;
763} 803}
764 804
765VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { 805VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 9689de0cb..3e36d356a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -15,8 +15,19 @@
15#include <vector> 15#include <vector>
16 16
17#define VK_NO_PROTOTYPES 17#define VK_NO_PROTOTYPES
18#ifdef _WIN32
19#define VK_USE_PLATFORM_WIN32_KHR
20#endif
18#include <vulkan/vulkan.h> 21#include <vulkan/vulkan.h>
19 22
23// Sanitize macros
24#ifdef CreateEvent
25#undef CreateEvent
26#endif
27#ifdef CreateSemaphore
28#undef CreateSemaphore
29#endif
30
20#include "common/common_types.h" 31#include "common/common_types.h"
21 32
22#ifdef _MSC_VER 33#ifdef _MSC_VER
@@ -174,7 +185,7 @@ struct InstanceDispatch {
174}; 185};
175 186
176/// Table holding Vulkan device function pointers. 187/// Table holding Vulkan device function pointers.
177struct DeviceDispatch : public InstanceDispatch { 188struct DeviceDispatch : InstanceDispatch {
178 PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{}; 189 PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
179 PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{}; 190 PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
180 PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{}; 191 PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
@@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch {
272 PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; 283 PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
273 PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; 284 PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
274 PFN_vkFreeMemory vkFreeMemory{}; 285 PFN_vkFreeMemory vkFreeMemory{};
275 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; 286 PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
276 PFN_vkGetDeviceQueue vkGetDeviceQueue{}; 287 PFN_vkGetDeviceQueue vkGetDeviceQueue{};
277 PFN_vkGetEventStatus vkGetEventStatus{}; 288 PFN_vkGetEventStatus vkGetEventStatus{};
278 PFN_vkGetFenceStatus vkGetFenceStatus{}; 289 PFN_vkGetFenceStatus vkGetFenceStatus{};
279 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; 290 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
291 PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
292#ifdef _WIN32
293 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
294#endif
280 PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; 295 PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
281 PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; 296 PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
282 PFN_vkMapMemory vkMapMemory{}; 297 PFN_vkMapMemory vkMapMemory{};
@@ -344,6 +359,9 @@ public:
344 /// Construct an empty handle. 359 /// Construct an empty handle.
345 Handle() = default; 360 Handle() = default;
346 361
362 /// Construct an empty handle.
363 Handle(std::nullptr_t) {}
364
347 /// Copying Vulkan objects is not supported and will never be. 365 /// Copying Vulkan objects is not supported and will never be.
348 Handle(const Handle&) = delete; 366 Handle(const Handle&) = delete;
349 Handle& operator=(const Handle&) = delete; 367 Handle& operator=(const Handle&) = delete;
@@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
659 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; 677 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
660 678
661public: 679public:
680 int GetMemoryFdKHR() const;
681
682#ifdef _WIN32
683 HANDLE GetMemoryWin32HandleKHR() const;
684#endif
685
662 /// Set object name. 686 /// Set object name.
663 void SetObjectNameEXT(const char* name) const; 687 void SetObjectNameEXT(const char* name) const;
664 688
@@ -847,7 +871,8 @@ public:
847 871
848 DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; 872 DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
849 873
850 VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; 874 VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
875 void* pnext = nullptr) const noexcept;
851 876
852 VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; 877 VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
853 878
@@ -1033,6 +1058,12 @@ public:
1033 1058
1034 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, 1059 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1035 VkDependencyFlags dependency_flags, 1060 VkDependencyFlags dependency_flags,
1061 const VkMemoryBarrier& memory_barrier) const noexcept {
1062 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {});
1063 }
1064
1065 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1066 VkDependencyFlags dependency_flags,
1036 const VkBufferMemoryBarrier& buffer_barrier) const noexcept { 1067 const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
1037 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); 1068 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
1038 } 1069 }
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index fb9967c8f..b025ced1c 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -151,6 +151,7 @@ add_executable(yuzu
151 util/util.h 151 util/util.h
152 compatdb.cpp 152 compatdb.cpp
153 compatdb.h 153 compatdb.h
154 yuzu.qrc
154 yuzu.rc 155 yuzu.rc
155) 156)
156 157
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp
index c680fd2c2..b92cd6886 100644
--- a/src/yuzu/applets/controller.cpp
+++ b/src/yuzu/applets/controller.cpp
@@ -67,6 +67,8 @@ bool IsControllerCompatible(Settings::ControllerType controller_type,
67 return parameters.allow_right_joycon; 67 return parameters.allow_right_joycon;
68 case Settings::ControllerType::Handheld: 68 case Settings::ControllerType::Handheld:
69 return parameters.enable_single_mode && parameters.allow_handheld; 69 return parameters.enable_single_mode && parameters.allow_handheld;
70 case Settings::ControllerType::GameCube:
71 return parameters.allow_gamecube_controller;
70 default: 72 default:
71 return false; 73 return false;
72 } 74 }
@@ -370,7 +372,7 @@ void QtControllerSelectorDialog::SetSupportedControllers() {
370 QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme)); 372 QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme));
371 } 373 }
372 374
373 if (parameters.allow_pro_controller) { 375 if (parameters.allow_pro_controller || parameters.allow_gamecube_controller) {
374 ui->controllerSupported5->setStyleSheet( 376 ui->controllerSupported5->setStyleSheet(
375 QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme)); 377 QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme));
376 } else { 378 } else {
@@ -420,6 +422,10 @@ void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index
420 Settings::ControllerType::Handheld); 422 Settings::ControllerType::Handheld);
421 emulated_controllers[player_index]->addItem(tr("Handheld")); 423 emulated_controllers[player_index]->addItem(tr("Handheld"));
422 } 424 }
425
426 pairs.emplace_back(emulated_controllers[player_index]->count(),
427 Settings::ControllerType::GameCube);
428 emulated_controllers[player_index]->addItem(tr("GameCube Controller"));
423} 429}
424 430
425Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex( 431Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex(
@@ -461,6 +467,7 @@ void QtControllerSelectorDialog::UpdateControllerIcon(std::size_t player_index)
461 switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(), 467 switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(),
462 player_index)) { 468 player_index)) {
463 case Settings::ControllerType::ProController: 469 case Settings::ControllerType::ProController:
470 case Settings::ControllerType::GameCube:
464 return QStringLiteral("image: url(:/controller/applet_pro_controller%0); "); 471 return QStringLiteral("image: url(:/controller/applet_pro_controller%0); ");
465 case Settings::ControllerType::DualJoyconDetached: 472 case Settings::ControllerType::DualJoyconDetached:
466 return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); "); 473 return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); ");
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index ffdf34a4a..1c61d419d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -64,7 +64,7 @@ void EmuThread::run() {
64 64
65 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); 65 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
66 66
67 system.Renderer().Rasterizer().LoadDiskResources( 67 system.Renderer().ReadRasterizer()->LoadDiskResources(
68 system.CurrentProcess()->GetTitleID(), stop_run, 68 system.CurrentProcess()->GetTitleID(), stop_run,
69 [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { 69 [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
70 emit LoadProgress(stage, value, total); 70 emit LoadProgress(stage, value, total);
@@ -405,12 +405,17 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
405 if (event->source() == Qt::MouseEventSynthesizedBySystem) { 405 if (event->source() == Qt::MouseEventSynthesizedBySystem) {
406 return; 406 return;
407 } 407 }
408
409 auto pos = event->pos(); 408 auto pos = event->pos();
410 const auto [x, y] = ScaleTouch(pos); 409 const auto [x, y] = ScaleTouch(pos);
411 input_subsystem->GetMouse()->MouseMove(x, y); 410 const int center_x = width() / 2;
411 const int center_y = height() / 2;
412 input_subsystem->GetMouse()->MouseMove(x, y, center_x, center_y);
412 this->TouchMoved(x, y, 0); 413 this->TouchMoved(x, y, 0);
413 414
415 if (Settings::values.mouse_panning) {
416 QCursor::setPos(mapToGlobal({center_x, center_y}));
417 }
418
414 emit MouseActivity(); 419 emit MouseActivity();
415} 420}
416 421
@@ -714,6 +719,11 @@ void GRenderWindow::showEvent(QShowEvent* event) {
714 719
715bool GRenderWindow::eventFilter(QObject* object, QEvent* event) { 720bool GRenderWindow::eventFilter(QObject* object, QEvent* event) {
716 if (event->type() == QEvent::HoverMove) { 721 if (event->type() == QEvent::HoverMove) {
722 if (Settings::values.mouse_panning) {
723 auto* hover_event = static_cast<QMouseEvent*>(event);
724 mouseMoveEvent(hover_event);
725 return false;
726 }
717 emit MouseActivity(); 727 emit MouseActivity();
718 } 728 }
719 return false; 729 return false;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8d85a1986..3d6f64300 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -220,7 +220,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
220// This must be in alphabetical order according to action name as it must have the same order as 220// This must be in alphabetical order according to action name as it must have the same order as
221// UISetting::values.shortcuts, which is alphabetically ordered. 221// UISetting::values.shortcuts, which is alphabetically ordered.
222// clang-format off 222// clang-format off
223const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{ 223const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
224 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}}, 224 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}},
225 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, 225 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
226 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, 226 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -235,6 +235,7 @@ const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
235 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, 235 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
236 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, 236 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
237 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, 237 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
238 {QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("F9"), Qt::ApplicationShortcut}},
238 {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}}, 239 {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}},
239 {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}}, 240 {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}},
240}}; 241}};
@@ -507,6 +508,9 @@ void Config::ReadControlValues() {
507 508
508 Settings::values.emulate_analog_keyboard = 509 Settings::values.emulate_analog_keyboard =
509 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); 510 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
511 Settings::values.mouse_panning = ReadSetting(QStringLiteral("mouse_panning"), false).toBool();
512 Settings::values.mouse_panning_sensitivity =
513 ReadSetting(QStringLiteral("mouse_panning_sensitivity"), 1).toFloat();
510 514
511 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true); 515 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
512 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), 516 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
@@ -610,12 +614,6 @@ void Config::ReadDataStorageValues() {
610 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir))) 614 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)))
611 .toString() 615 .toString()
612 .toStdString()); 616 .toStdString());
613 FS::GetUserPath(FS::UserPath::CacheDir,
614 qt_config
615 ->value(QStringLiteral("cache_directory"),
616 QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)))
617 .toString()
618 .toStdString());
619 Settings::values.gamecard_inserted = 617 Settings::values.gamecard_inserted =
620 ReadSetting(QStringLiteral("gamecard_inserted"), false).toBool(); 618 ReadSetting(QStringLiteral("gamecard_inserted"), false).toBool();
621 Settings::values.gamecard_current_game = 619 Settings::values.gamecard_current_game =
@@ -778,14 +776,14 @@ void Config::ReadRendererValues() {
778 ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); 776 ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100);
779 ReadSettingGlobal(Settings::values.use_disk_shader_cache, 777 ReadSettingGlobal(Settings::values.use_disk_shader_cache,
780 QStringLiteral("use_disk_shader_cache"), true); 778 QStringLiteral("use_disk_shader_cache"), true);
781 ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); 779 ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
782 ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, 780 ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
783 QStringLiteral("use_asynchronous_gpu_emulation"), true); 781 QStringLiteral("use_asynchronous_gpu_emulation"), true);
784 ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), 782 ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
785 true); 783 true);
786 ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); 784 ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
787 ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), 785 ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
788 true); 786 false);
789 ReadSettingGlobal(Settings::values.use_asynchronous_shaders, 787 ReadSettingGlobal(Settings::values.use_asynchronous_shaders,
790 QStringLiteral("use_asynchronous_shaders"), false); 788 QStringLiteral("use_asynchronous_shaders"), false);
791 ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), 789 ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
@@ -1184,7 +1182,9 @@ void Config::SaveControlValues() {
1184 WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false); 1182 WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false);
1185 WriteSetting(QStringLiteral("emulate_analog_keyboard"), 1183 WriteSetting(QStringLiteral("emulate_analog_keyboard"),
1186 Settings::values.emulate_analog_keyboard, false); 1184 Settings::values.emulate_analog_keyboard, false);
1187 1185 WriteSetting(QStringLiteral("mouse_panning"), Settings::values.mouse_panning, false);
1186 WriteSetting(QStringLiteral("mouse_panning_sensitivity"),
1187 Settings::values.mouse_panning_sensitivity, 1.0f);
1188 qt_config->endGroup(); 1188 qt_config->endGroup();
1189} 1189}
1190 1190
@@ -1212,9 +1212,6 @@ void Config::SaveDataStorageValues() {
1212 WriteSetting(QStringLiteral("dump_directory"), 1212 WriteSetting(QStringLiteral("dump_directory"),
1213 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)), 1213 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)),
1214 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir))); 1214 QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)));
1215 WriteSetting(QStringLiteral("cache_directory"),
1216 QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)),
1217 QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)));
1218 WriteSetting(QStringLiteral("gamecard_inserted"), Settings::values.gamecard_inserted, false); 1215 WriteSetting(QStringLiteral("gamecard_inserted"), Settings::values.gamecard_inserted, false);
1219 WriteSetting(QStringLiteral("gamecard_current_game"), Settings::values.gamecard_current_game, 1216 WriteSetting(QStringLiteral("gamecard_current_game"), Settings::values.gamecard_current_game,
1220 false); 1217 false);
@@ -1345,14 +1342,14 @@ void Config::SaveRendererValues() {
1345 Settings::values.use_disk_shader_cache, true); 1342 Settings::values.use_disk_shader_cache, true);
1346 WriteSettingGlobal(QStringLiteral("gpu_accuracy"), 1343 WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
1347 static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), 1344 static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
1348 Settings::values.gpu_accuracy.UsingGlobal(), 0); 1345 Settings::values.gpu_accuracy.UsingGlobal(), 1);
1349 WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), 1346 WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
1350 Settings::values.use_asynchronous_gpu_emulation, true); 1347 Settings::values.use_asynchronous_gpu_emulation, true);
1351 WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, 1348 WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
1352 true); 1349 true);
1353 WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); 1350 WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
1354 WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), 1351 WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
1355 Settings::values.use_assembly_shaders, true); 1352 Settings::values.use_assembly_shaders, false);
1356 WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), 1353 WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"),
1357 Settings::values.use_asynchronous_shaders, false); 1354 Settings::values.use_asynchronous_shaders, false);
1358 WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, 1355 WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 8a600e19d..949c4eb13 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,7 +42,7 @@ public:
42 default_mouse_buttons; 42 default_mouse_buttons;
43 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; 43 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
44 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; 44 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
45 static const std::array<UISettings::Shortcut, 16> default_hotkeys; 45 static const std::array<UISettings::Shortcut, 17> default_hotkeys;
46 46
47private: 47private:
48 void Initialize(const std::string& config_name); 48 void Initialize(const std::string& config_name);
diff --git a/src/yuzu/configuration/configure_filesystem.cpp b/src/yuzu/configuration/configure_filesystem.cpp
index 7ab4a80f7..bde2d4620 100644
--- a/src/yuzu/configuration/configure_filesystem.cpp
+++ b/src/yuzu/configuration/configure_filesystem.cpp
@@ -26,8 +26,6 @@ ConfigureFilesystem::ConfigureFilesystem(QWidget* parent)
26 [this] { SetDirectory(DirectoryTarget::Dump, ui->dump_path_edit); }); 26 [this] { SetDirectory(DirectoryTarget::Dump, ui->dump_path_edit); });
27 connect(ui->load_path_button, &QToolButton::pressed, this, 27 connect(ui->load_path_button, &QToolButton::pressed, this,
28 [this] { SetDirectory(DirectoryTarget::Load, ui->load_path_edit); }); 28 [this] { SetDirectory(DirectoryTarget::Load, ui->load_path_edit); });
29 connect(ui->cache_directory_button, &QToolButton::pressed, this,
30 [this] { SetDirectory(DirectoryTarget::Cache, ui->cache_directory_edit); });
31 29
32 connect(ui->reset_game_list_cache, &QPushButton::pressed, this, 30 connect(ui->reset_game_list_cache, &QPushButton::pressed, this,
33 &ConfigureFilesystem::ResetMetadata); 31 &ConfigureFilesystem::ResetMetadata);
@@ -50,8 +48,6 @@ void ConfigureFilesystem::setConfiguration() {
50 QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::DumpDir))); 48 QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::DumpDir)));
51 ui->load_path_edit->setText( 49 ui->load_path_edit->setText(
52 QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::LoadDir))); 50 QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::LoadDir)));
53 ui->cache_directory_edit->setText(
54 QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::CacheDir)));
55 51
56 ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted); 52 ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted);
57 ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game); 53 ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game);
@@ -72,9 +68,6 @@ void ConfigureFilesystem::applyConfiguration() {
72 ui->dump_path_edit->text().toStdString()); 68 ui->dump_path_edit->text().toStdString());
73 Common::FS::GetUserPath(Common::FS::UserPath::LoadDir, 69 Common::FS::GetUserPath(Common::FS::UserPath::LoadDir,
74 ui->load_path_edit->text().toStdString()); 70 ui->load_path_edit->text().toStdString());
75 Common::FS::GetUserPath(Common::FS::UserPath::CacheDir,
76 ui->cache_directory_edit->text().toStdString());
77 Settings::values.gamecard_path = ui->gamecard_path_edit->text().toStdString();
78 71
79 Settings::values.gamecard_inserted = ui->gamecard_inserted->isChecked(); 72 Settings::values.gamecard_inserted = ui->gamecard_inserted->isChecked();
80 Settings::values.gamecard_current_game = ui->gamecard_current_game->isChecked(); 73 Settings::values.gamecard_current_game = ui->gamecard_current_game->isChecked();
@@ -103,9 +96,6 @@ void ConfigureFilesystem::SetDirectory(DirectoryTarget target, QLineEdit* edit)
103 case DirectoryTarget::Load: 96 case DirectoryTarget::Load:
104 caption = tr("Select Mod Load Directory..."); 97 caption = tr("Select Mod Load Directory...");
105 break; 98 break;
106 case DirectoryTarget::Cache:
107 caption = tr("Select Cache Directory...");
108 break;
109 } 99 }
110 100
111 QString str; 101 QString str;
diff --git a/src/yuzu/configuration/configure_filesystem.h b/src/yuzu/configuration/configure_filesystem.h
index a79303760..2147cd405 100644
--- a/src/yuzu/configuration/configure_filesystem.h
+++ b/src/yuzu/configuration/configure_filesystem.h
@@ -32,7 +32,6 @@ private:
32 Gamecard, 32 Gamecard,
33 Dump, 33 Dump,
34 Load, 34 Load,
35 Cache,
36 }; 35 };
37 36
38 void SetDirectory(DirectoryTarget target, QLineEdit* edit); 37 void SetDirectory(DirectoryTarget target, QLineEdit* edit);
diff --git a/src/yuzu/configuration/configure_filesystem.ui b/src/yuzu/configuration/configure_filesystem.ui
index 84bea0600..62b9abc7a 100644
--- a/src/yuzu/configuration/configure_filesystem.ui
+++ b/src/yuzu/configuration/configure_filesystem.ui
@@ -198,40 +198,7 @@
198 <string>Caching</string> 198 <string>Caching</string>
199 </property> 199 </property>
200 <layout class="QGridLayout" name="gridLayout_5"> 200 <layout class="QGridLayout" name="gridLayout_5">
201 <item row="0" column="0"> 201 <item row="0" column="0" colspan="2">
202 <widget class="QLabel" name="label_10">
203 <property name="text">
204 <string>Cache Directory</string>
205 </property>
206 </widget>
207 </item>
208 <item row="0" column="1">
209 <spacer name="horizontalSpacer_3">
210 <property name="orientation">
211 <enum>Qt::Horizontal</enum>
212 </property>
213 <property name="sizeType">
214 <enum>QSizePolicy::Fixed</enum>
215 </property>
216 <property name="sizeHint" stdset="0">
217 <size>
218 <width>40</width>
219 <height>20</height>
220 </size>
221 </property>
222 </spacer>
223 </item>
224 <item row="0" column="2">
225 <widget class="QLineEdit" name="cache_directory_edit"/>
226 </item>
227 <item row="0" column="3">
228 <widget class="QToolButton" name="cache_directory_button">
229 <property name="text">
230 <string>...</string>
231 </property>
232 </widget>
233 </item>
234 <item row="1" column="0" colspan="4">
235 <layout class="QHBoxLayout" name="horizontalLayout_2"> 202 <layout class="QHBoxLayout" name="horizontalLayout_2">
236 <item> 203 <item>
237 <widget class="QCheckBox" name="cache_game_list"> 204 <widget class="QCheckBox" name="cache_game_list">
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index b78a5dff0..9ff32aec4 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5// Include this early to include Vulkan headers how we want to
6#include "video_core/vulkan_common/vulkan_wrapper.h"
7
5#include <QColorDialog> 8#include <QColorDialog>
6#include <QComboBox> 9#include <QComboBox>
7#include <QVulkanInstance> 10#include <QVulkanInstance>
@@ -11,7 +14,8 @@
11#include "core/core.h" 14#include "core/core.h"
12#include "core/settings.h" 15#include "core/settings.h"
13#include "ui_configure_graphics.h" 16#include "ui_configure_graphics.h"
14#include "video_core/renderer_vulkan/renderer_vulkan.h" 17#include "video_core/vulkan_common/vulkan_instance.h"
18#include "video_core/vulkan_common/vulkan_library.h"
15#include "yuzu/configuration/configuration_shared.h" 19#include "yuzu/configuration/configuration_shared.h"
16#include "yuzu/configuration/configure_graphics.h" 20#include "yuzu/configuration/configure_graphics.h"
17 21
@@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
212 ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); 216 ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
213} 217}
214 218
215void ConfigureGraphics::RetrieveVulkanDevices() { 219void ConfigureGraphics::RetrieveVulkanDevices() try {
220 using namespace Vulkan;
221
222 vk::InstanceDispatch dld;
223 const Common::DynamicLibrary library = OpenLibrary();
224 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
225 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
226
216 vulkan_devices.clear(); 227 vulkan_devices.clear();
217 for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { 228 vulkan_devices.reserve(physical_devices.size());
229 for (const VkPhysicalDevice device : physical_devices) {
230 const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName;
218 vulkan_devices.push_back(QString::fromStdString(name)); 231 vulkan_devices.push_back(QString::fromStdString(name));
219 } 232 }
233
234} catch (const Vulkan::vk::Exception& exception) {
235 LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
220} 236}
221 237
222Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { 238Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp
index 4e557bc6f..a1a0eb676 100644
--- a/src/yuzu/configuration/configure_input_advanced.cpp
+++ b/src/yuzu/configuration/configure_input_advanced.cpp
@@ -122,6 +122,9 @@ void ConfigureInputAdvanced::ApplyConfiguration() {
122 Settings::values.mouse_enabled = ui->mouse_enabled->isChecked(); 122 Settings::values.mouse_enabled = ui->mouse_enabled->isChecked();
123 Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked(); 123 Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked();
124 Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked(); 124 Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked();
125 Settings::values.mouse_panning = ui->mouse_panning->isChecked();
126 Settings::values.mouse_panning_sensitivity =
127 static_cast<float>(ui->mouse_panning_sensitivity->value());
125 Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked(); 128 Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked();
126} 129}
127 130
@@ -149,6 +152,8 @@ void ConfigureInputAdvanced::LoadConfiguration() {
149 ui->mouse_enabled->setChecked(Settings::values.mouse_enabled); 152 ui->mouse_enabled->setChecked(Settings::values.mouse_enabled);
150 ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled); 153 ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled);
151 ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard); 154 ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard);
155 ui->mouse_panning->setChecked(Settings::values.mouse_panning);
156 ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity);
152 ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled); 157 ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
153 158
154 UpdateUIEnabled(); 159 UpdateUIEnabled();
diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui
index f207e5d3b..173130d8d 100644
--- a/src/yuzu/configuration/configure_input_advanced.ui
+++ b/src/yuzu/configuration/configure_input_advanced.ui
@@ -2546,27 +2546,65 @@
2546 </property> 2546 </property>
2547 </widget> 2547 </widget>
2548 </item> 2548 </item>
2549 <item row="1" column="0"> 2549 <item row="1" column="0">
2550 <widget class="QCheckBox" name="emulate_analog_keyboard"> 2550 <widget class="QCheckBox" name="emulate_analog_keyboard">
2551 <property name="minimumSize"> 2551 <property name="minimumSize">
2552 <size> 2552 <size>
2553 <width>0</width> 2553 <width>0</width>
2554 <height>23</height> 2554 <height>23</height>
2555 </size> 2555 </size>
2556 </property> 2556 </property>
2557 <property name="text"> 2557 <property name="text">
2558 <string>Emulate Analog with Keyboard Input</string> 2558 <string>Emulate Analog with Keyboard Input</string>
2559 </property> 2559 </property>
2560 </widget> 2560 </widget>
2561 </item> 2561 </item>
2562 <item row="5" column="2"> 2562 <item row="2" column="0">
2563 <widget class="QCheckBox" name="mouse_panning">
2564 <property name="minimumSize">
2565 <size>
2566 <width>0</width>
2567 <height>23</height>
2568 </size>
2569 </property>
2570 <property name="text">
2571 <string>Enable mouse panning</string>
2572 </property>
2573 </widget>
2574 </item>
2575 <item row="2" column="2">
2576 <widget class="QDoubleSpinBox" name="mouse_panning_sensitivity">
2577 <property name="toolTip">
2578 <string>Mouse sensitivity</string>
2579 </property>
2580 <property name="alignment">
2581 <set>Qt::AlignCenter</set>
2582 </property>
2583 <property name="decimals">
2584 <number>2</number>
2585 </property>
2586 <property name="minimum">
2587 <double>0.100000000000000</double>
2588 </property>
2589 <property name="maximum">
2590 <double>16.000000000000000</double>
2591 </property>
2592 <property name="singleStep">
2593 <double>0.010000000000000</double>
2594 </property>
2595 <property name="value">
2596 <double>1.000000000000000</double>
2597 </property>
2598 </widget>
2599 </item>
2600 <item row="6" column="2">
2563 <widget class="QPushButton" name="touchscreen_advanced"> 2601 <widget class="QPushButton" name="touchscreen_advanced">
2564 <property name="text"> 2602 <property name="text">
2565 <string>Advanced</string> 2603 <string>Advanced</string>
2566 </property> 2604 </property>
2567 </widget> 2605 </widget>
2568 </item> 2606 </item>
2569 <item row="2" column="1"> 2607 <item row="3" column="1">
2570 <spacer name="horizontalSpacer_8"> 2608 <spacer name="horizontalSpacer_8">
2571 <property name="orientation"> 2609 <property name="orientation">
2572 <enum>Qt::Horizontal</enum> 2610 <enum>Qt::Horizontal</enum>
@@ -2582,21 +2620,21 @@
2582 </property> 2620 </property>
2583 </spacer> 2621 </spacer>
2584 </item> 2622 </item>
2585 <item row="2" column="2"> 2623 <item row="3" column="2">
2586 <widget class="QPushButton" name="mouse_advanced"> 2624 <widget class="QPushButton" name="mouse_advanced">
2587 <property name="text"> 2625 <property name="text">
2588 <string>Advanced</string> 2626 <string>Advanced</string>
2589 </property> 2627 </property>
2590 </widget> 2628 </widget>
2591 </item> 2629 </item>
2592 <item row="5" column="0"> 2630 <item row="6" column="0">
2593 <widget class="QCheckBox" name="touchscreen_enabled"> 2631 <widget class="QCheckBox" name="touchscreen_enabled">
2594 <property name="text"> 2632 <property name="text">
2595 <string>Touchscreen</string> 2633 <string>Touchscreen</string>
2596 </property> 2634 </property>
2597 </widget> 2635 </widget>
2598 </item> 2636 </item>
2599 <item row="2" column="0"> 2637 <item row="3" column="0">
2600 <widget class="QCheckBox" name="mouse_enabled"> 2638 <widget class="QCheckBox" name="mouse_enabled">
2601 <property name="minimumSize"> 2639 <property name="minimumSize">
2602 <size> 2640 <size>
@@ -2609,28 +2647,28 @@
2609 </property> 2647 </property>
2610 </widget> 2648 </widget>
2611 </item> 2649 </item>
2612 <item row="7" column="0"> 2650 <item row="8" column="0">
2613 <widget class="QLabel" name="motion_touch"> 2651 <widget class="QLabel" name="motion_touch">
2614 <property name="text"> 2652 <property name="text">
2615 <string>Motion / Touch</string> 2653 <string>Motion / Touch</string>
2616 </property> 2654 </property>
2617 </widget> 2655 </widget>
2618 </item> 2656 </item>
2619 <item row="7" column="2"> 2657 <item row="8" column="2">
2620 <widget class="QPushButton" name="buttonMotionTouch"> 2658 <widget class="QPushButton" name="buttonMotionTouch">
2621 <property name="text"> 2659 <property name="text">
2622 <string>Configure</string> 2660 <string>Configure</string>
2623 </property> 2661 </property>
2624 </widget> 2662 </widget>
2625 </item> 2663 </item>
2626 <item row="6" column="0"> 2664 <item row="7" column="0">
2627 <widget class="QCheckBox" name="debug_enabled"> 2665 <widget class="QCheckBox" name="debug_enabled">
2628 <property name="text"> 2666 <property name="text">
2629 <string>Debug Controller</string> 2667 <string>Debug Controller</string>
2630 </property> 2668 </property>
2631 </widget> 2669 </widget>
2632 </item> 2670 </item>
2633 <item row="6" column="2"> 2671 <item row="7" column="2">
2634 <widget class="QPushButton" name="debug_configure"> 2672 <widget class="QPushButton" name="debug_configure">
2635 <property name="text"> 2673 <property name="text">
2636 <string>Configure</string> 2674 <string>Configure</string>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index c9d19c948..21d0d3449 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -467,10 +467,14 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
467 467
468 UpdateControllerIcon(); 468 UpdateControllerIcon();
469 UpdateControllerAvailableButtons(); 469 UpdateControllerAvailableButtons();
470 UpdateControllerEnabledButtons();
471 UpdateControllerButtonNames();
470 UpdateMotionButtons(); 472 UpdateMotionButtons();
471 connect(ui->comboControllerType, qOverload<int>(&QComboBox::currentIndexChanged), [this](int) { 473 connect(ui->comboControllerType, qOverload<int>(&QComboBox::currentIndexChanged), [this](int) {
472 UpdateControllerIcon(); 474 UpdateControllerIcon();
473 UpdateControllerAvailableButtons(); 475 UpdateControllerAvailableButtons();
476 UpdateControllerEnabledButtons();
477 UpdateControllerButtonNames();
474 UpdateMotionButtons(); 478 UpdateMotionButtons();
475 }); 479 });
476 480
@@ -558,9 +562,6 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
558 &ConfigureInputPlayer::SaveProfile); 562 &ConfigureInputPlayer::SaveProfile);
559 563
560 LoadConfiguration(); 564 LoadConfiguration();
561
562 // TODO(wwylele): enable this when we actually emulate it
563 ui->buttonHome->setEnabled(false);
564 ui->controllerFrame->SetPlayerInput(player_index, buttons_param, analogs_param); 565 ui->controllerFrame->SetPlayerInput(player_index, buttons_param, analogs_param);
565 ui->controllerFrame->SetConnectedStatus(ui->groupConnectedController->isChecked()); 566 ui->controllerFrame->SetConnectedStatus(ui->groupConnectedController->isChecked());
566} 567}
@@ -924,6 +925,12 @@ void ConfigureInputPlayer::SetConnectableControllers() {
924 Settings::ControllerType::Handheld); 925 Settings::ControllerType::Handheld);
925 ui->comboControllerType->addItem(tr("Handheld")); 926 ui->comboControllerType->addItem(tr("Handheld"));
926 } 927 }
928
929 if (enable_all || npad_style_set.gamecube == 1) {
930 index_controller_type_pairs.emplace_back(ui->comboControllerType->count(),
931 Settings::ControllerType::GameCube);
932 ui->comboControllerType->addItem(tr("GameCube Controller"));
933 }
927 }; 934 };
928 935
929 Core::System& system{Core::System::GetInstance()}; 936 Core::System& system{Core::System::GetInstance()};
@@ -1014,7 +1021,7 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
1014 1021
1015 // List of all the widgets that will be hidden by any of the following layouts that need 1022 // List of all the widgets that will be hidden by any of the following layouts that need
1016 // "unhidden" after the controller type changes 1023 // "unhidden" after the controller type changes
1017 const std::array<QWidget*, 9> layout_show = { 1024 const std::array<QWidget*, 11> layout_show = {
1018 ui->buttonShoulderButtonsSLSR, 1025 ui->buttonShoulderButtonsSLSR,
1019 ui->horizontalSpacerShoulderButtonsWidget, 1026 ui->horizontalSpacerShoulderButtonsWidget,
1020 ui->horizontalSpacerShoulderButtonsWidget2, 1027 ui->horizontalSpacerShoulderButtonsWidget2,
@@ -1024,6 +1031,8 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
1024 ui->buttonShoulderButtonsRight, 1031 ui->buttonShoulderButtonsRight,
1025 ui->buttonMiscButtonsPlusHome, 1032 ui->buttonMiscButtonsPlusHome,
1026 ui->bottomRight, 1033 ui->bottomRight,
1034 ui->buttonMiscButtonsMinusGroup,
1035 ui->buttonMiscButtonsScreenshotGroup,
1027 }; 1036 };
1028 1037
1029 for (auto* widget : layout_show) { 1038 for (auto* widget : layout_show) {
@@ -1056,6 +1065,14 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
1056 ui->bottomLeft, 1065 ui->bottomLeft,
1057 }; 1066 };
1058 break; 1067 break;
1068 case Settings::ControllerType::GameCube:
1069 layout_hidden = {
1070 ui->buttonShoulderButtonsSLSR,
1071 ui->horizontalSpacerShoulderButtonsWidget2,
1072 ui->buttonMiscButtonsMinusGroup,
1073 ui->buttonMiscButtonsScreenshotGroup,
1074 };
1075 break;
1059 } 1076 }
1060 1077
1061 for (auto* widget : layout_hidden) { 1078 for (auto* widget : layout_hidden) {
@@ -1063,6 +1080,52 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() {
1063 } 1080 }
1064} 1081}
1065 1082
1083void ConfigureInputPlayer::UpdateControllerEnabledButtons() {
1084 auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
1085 if (debug) {
1086 layout = Settings::ControllerType::ProController;
1087 }
1088
1089 // List of all the widgets that will be disabled by any of the following layouts that need
1090 // "enabled" after the controller type changes
1091 const std::array<QWidget*, 4> layout_enable = {
1092 ui->buttonHome,
1093 ui->buttonLStickPressedGroup,
1094 ui->groupRStickPressed,
1095 ui->buttonShoulderButtonsButtonLGroup,
1096 };
1097
1098 for (auto* widget : layout_enable) {
1099 widget->setEnabled(true);
1100 }
1101
1102 std::vector<QWidget*> layout_disable;
1103 switch (layout) {
1104 case Settings::ControllerType::ProController:
1105 case Settings::ControllerType::DualJoyconDetached:
1106 case Settings::ControllerType::Handheld:
1107 case Settings::ControllerType::LeftJoycon:
1108 case Settings::ControllerType::RightJoycon:
1109 // TODO(wwylele): enable this when we actually emulate it
1110 layout_disable = {
1111 ui->buttonHome,
1112 };
1113 break;
1114 case Settings::ControllerType::GameCube:
1115 layout_disable = {
1116 ui->buttonHome,
1117 ui->buttonLStickPressedGroup,
1118 ui->groupRStickPressed,
1119 ui->buttonShoulderButtonsButtonLGroup,
1120 };
1121 break;
1122 }
1123
1124 for (auto* widget : layout_disable) {
1125 widget->setEnabled(false);
1126 }
1127}
1128
1066void ConfigureInputPlayer::UpdateMotionButtons() { 1129void ConfigureInputPlayer::UpdateMotionButtons() {
1067 if (debug) { 1130 if (debug) {
1068 // Motion isn't used with the debug controller, hide both groupboxes. 1131 // Motion isn't used with the debug controller, hide both groupboxes.
@@ -1085,6 +1148,11 @@ void ConfigureInputPlayer::UpdateMotionButtons() {
1085 ui->buttonMotionLeftGroup->hide(); 1148 ui->buttonMotionLeftGroup->hide();
1086 ui->buttonMotionRightGroup->show(); 1149 ui->buttonMotionRightGroup->show();
1087 break; 1150 break;
1151 case Settings::ControllerType::GameCube:
1152 // Hide both "Motion 1/2".
1153 ui->buttonMotionLeftGroup->hide();
1154 ui->buttonMotionRightGroup->hide();
1155 break;
1088 case Settings::ControllerType::DualJoyconDetached: 1156 case Settings::ControllerType::DualJoyconDetached:
1089 default: 1157 default:
1090 // Show both "Motion 1/2". 1158 // Show both "Motion 1/2".
@@ -1094,6 +1162,36 @@ void ConfigureInputPlayer::UpdateMotionButtons() {
1094 } 1162 }
1095} 1163}
1096 1164
1165void ConfigureInputPlayer::UpdateControllerButtonNames() {
1166 auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
1167 if (debug) {
1168 layout = Settings::ControllerType::ProController;
1169 }
1170
1171 switch (layout) {
1172 case Settings::ControllerType::ProController:
1173 case Settings::ControllerType::DualJoyconDetached:
1174 case Settings::ControllerType::Handheld:
1175 case Settings::ControllerType::LeftJoycon:
1176 case Settings::ControllerType::RightJoycon:
1177 ui->buttonMiscButtonsPlusGroup->setTitle(tr("Plus"));
1178 ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("ZL"));
1179 ui->buttonShoulderButtonsZRGroup->setTitle(tr("ZR"));
1180 ui->buttonShoulderButtonsRGroup->setTitle(tr("R"));
1181 ui->LStick->setTitle(tr("Left Stick"));
1182 ui->RStick->setTitle(tr("Right Stick"));
1183 break;
1184 case Settings::ControllerType::GameCube:
1185 ui->buttonMiscButtonsPlusGroup->setTitle(tr("Start / Pause"));
1186 ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("L"));
1187 ui->buttonShoulderButtonsZRGroup->setTitle(tr("R"));
1188 ui->buttonShoulderButtonsRGroup->setTitle(tr("Z"));
1189 ui->LStick->setTitle(tr("Control Stick"));
1190 ui->RStick->setTitle(tr("C-Stick"));
1191 break;
1192 }
1193}
1194
1097void ConfigureInputPlayer::UpdateMappingWithDefaults() { 1195void ConfigureInputPlayer::UpdateMappingWithDefaults() {
1098 if (ui->comboDevices->currentIndex() == 0) { 1196 if (ui->comboDevices->currentIndex() == 0) {
1099 return; 1197 return;
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index da2b89136..efe953fbc 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -143,9 +143,15 @@ private:
143 /// Hides and disables controller settings based on the current controller type. 143 /// Hides and disables controller settings based on the current controller type.
144 void UpdateControllerAvailableButtons(); 144 void UpdateControllerAvailableButtons();
145 145
146 /// Disables controller settings based on the current controller type.
147 void UpdateControllerEnabledButtons();
148
146 /// Shows or hides motion groupboxes based on the current controller type. 149 /// Shows or hides motion groupboxes based on the current controller type.
147 void UpdateMotionButtons(); 150 void UpdateMotionButtons();
148 151
152 /// Alters the button names based on the current controller type.
153 void UpdateControllerButtonNames();
154
149 /// Gets the default controller mapping for this device and auto configures the input to match. 155 /// Gets the default controller mapping for this device and auto configures the input to match.
150 void UpdateMappingWithDefaults(); 156 void UpdateMappingWithDefaults();
151 157
diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp
index e77ccc057..61ba91cef 100644
--- a/src/yuzu/configuration/configure_input_player_widget.cpp
+++ b/src/yuzu/configuration/configure_input_player_widget.cpp
@@ -37,7 +37,8 @@ void PlayerControlPreview::SetPlayerInput(std::size_t index, const ButtonParam&
37 Input::CreateDevice<Input::AnalogDevice>); 37 Input::CreateDevice<Input::AnalogDevice>);
38 UpdateColors(); 38 UpdateColors();
39} 39}
40void PlayerControlPreview::SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_, 40void PlayerControlPreview::SetPlayerInputRaw(std::size_t index,
41 const Settings::ButtonsRaw& buttons_,
41 Settings::AnalogsRaw analogs_) { 42 Settings::AnalogsRaw analogs_) {
42 player_index = index; 43 player_index = index;
43 std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN, 44 std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN,
@@ -226,6 +227,9 @@ void PlayerControlPreview::paintEvent(QPaintEvent* event) {
226 case Settings::ControllerType::RightJoycon: 227 case Settings::ControllerType::RightJoycon:
227 DrawRightController(p, center); 228 DrawRightController(p, center);
228 break; 229 break;
230 case Settings::ControllerType::GameCube:
231 DrawGCController(p, center);
232 break;
229 case Settings::ControllerType::ProController: 233 case Settings::ControllerType::ProController:
230 default: 234 default:
231 DrawProController(p, center); 235 DrawProController(p, center);
@@ -517,14 +521,15 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center)
517 { 521 {
518 // Draw joysticks 522 // Draw joysticks
519 using namespace Settings::NativeAnalog; 523 using namespace Settings::NativeAnalog;
520 DrawJoystick(p, center + QPointF(-65, -65) + (axis_values[LStick].value * 7), 1.62f, 524 const auto& l_stick = axis_values[LStick];
521 button_values[Settings::NativeButton::LStick]); 525 const auto l_button = button_values[Settings::NativeButton::LStick];
522 DrawJoystick(p, center + QPointF(65, 12) + (axis_values[RStick].value * 7), 1.62f, 526 const auto& r_stick = axis_values[RStick];
523 button_values[Settings::NativeButton::RStick]); 527 const auto r_button = button_values[Settings::NativeButton::RStick];
524 DrawRawJoystick(p, center + QPointF(-180, 90), axis_values[LStick].raw_value, 528
525 axis_values[LStick].properties); 529 DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button);
526 DrawRawJoystick(p, center + QPointF(180, 90), axis_values[RStick].raw_value, 530 DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button);
527 axis_values[RStick].properties); 531 DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties);
532 DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties);
528 } 533 }
529 534
530 using namespace Settings::NativeButton; 535 using namespace Settings::NativeButton;
@@ -603,14 +608,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen
603 { 608 {
604 // Draw joysticks 609 // Draw joysticks
605 using namespace Settings::NativeAnalog; 610 using namespace Settings::NativeAnalog;
606 DrawJoystick(p, center + QPointF(-171, -41) + (axis_values[LStick].value * 4), 1.0f, 611 const auto& l_stick = axis_values[LStick];
607 button_values[Settings::NativeButton::LStick]); 612 const auto l_button = button_values[Settings::NativeButton::LStick];
608 DrawJoystick(p, center + QPointF(171, 8) + (axis_values[RStick].value * 4), 1.0f, 613 const auto& r_stick = axis_values[RStick];
609 button_values[Settings::NativeButton::RStick]); 614 const auto r_button = button_values[Settings::NativeButton::RStick];
610 DrawRawJoystick(p, center + QPointF(-50, 0), axis_values[LStick].raw_value, 615
611 axis_values[LStick].properties); 616 DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button);
612 DrawRawJoystick(p, center + QPointF(50, 0), axis_values[RStick].raw_value, 617 DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button);
613 axis_values[RStick].properties); 618 DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties);
619 DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties);
614 } 620 }
615 621
616 using namespace Settings::NativeButton; 622 using namespace Settings::NativeButton;
@@ -1002,12 +1008,6 @@ constexpr std::array<float, 3 * 2> up_arrow_symbol = {
1002 0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f, 1008 0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f,
1003}; 1009};
1004 1010
1005constexpr std::array<float, 13 * 2> up_arrow = {
1006 9.4f, -9.8f, 9.4f, -10.2f, 8.9f, -29.8f, 8.5f, -30.0f, 8.1f,
1007 -30.1f, 7.7f, -30.1f, -8.6f, -30.0f, -9.0f, -29.8f, -9.3f, -29.5f,
1008 -9.5f, -29.1f, -9.5f, -28.7f, -9.1f, -9.1f, -8.8f, -8.8f,
1009};
1010
1011constexpr std::array<float, 64 * 2> trigger_button = { 1011constexpr std::array<float, 64 * 2> trigger_button = {
1012 5.5f, -12.6f, 5.8f, -12.6f, 6.7f, -12.5f, 8.1f, -12.3f, 8.6f, -12.2f, 9.2f, -12.0f, 1012 5.5f, -12.6f, 5.8f, -12.6f, 6.7f, -12.5f, 8.1f, -12.3f, 8.6f, -12.2f, 9.2f, -12.0f,
1013 9.5f, -11.9f, 9.9f, -11.8f, 10.6f, -11.5f, 11.0f, -11.3f, 11.2f, -11.2f, 11.4f, -11.1f, 1013 9.5f, -11.9f, 9.9f, -11.8f, 10.6f, -11.5f, 11.0f, -11.3f, 11.2f, -11.2f, 11.4f, -11.1f,
@@ -1457,15 +1457,18 @@ void PlayerControlPreview::DrawProBody(QPainter& p, const QPointF center) {
1457 constexpr int radius1 = 32; 1457 constexpr int radius1 = 32;
1458 1458
1459 for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) { 1459 for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) {
1460 qleft_handle[point] = 1460 const float left_x = pro_left_handle[point * 2 + 0];
1461 center + QPointF(pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]); 1461 const float left_y = pro_left_handle[point * 2 + 1];
1462 qright_handle[point] = 1462
1463 center + QPointF(-pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]); 1463 qleft_handle[point] = center + QPointF(left_x, left_y);
1464 qright_handle[point] = center + QPointF(-left_x, left_y);
1464 } 1465 }
1465 for (std::size_t point = 0; point < pro_body.size() / 2; ++point) { 1466 for (std::size_t point = 0; point < pro_body.size() / 2; ++point) {
1466 qbody[point] = center + QPointF(pro_body[point * 2], pro_body[point * 2 + 1]); 1467 const float body_x = pro_body[point * 2 + 0];
1467 qbody[pro_body.size() - 1 - point] = 1468 const float body_y = pro_body[point * 2 + 1];
1468 center + QPointF(-pro_body[point * 2], pro_body[point * 2 + 1]); 1469
1470 qbody[point] = center + QPointF(body_x, body_y);
1471 qbody[pro_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
1469 } 1472 }
1470 1473
1471 // Draw left handle body 1474 // Draw left handle body
@@ -1496,21 +1499,25 @@ void PlayerControlPreview::DrawGCBody(QPainter& p, const QPointF center) {
1496 constexpr float angle = 2 * 3.1415f / 8; 1499 constexpr float angle = 2 * 3.1415f / 8;
1497 1500
1498 for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) { 1501 for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) {
1499 qleft_handle[point] = 1502 const float body_x = gc_left_body[point * 2 + 0];
1500 center + QPointF(gc_left_body[point * 2], gc_left_body[point * 2 + 1]); 1503 const float body_y = gc_left_body[point * 2 + 1];
1501 qright_handle[point] = 1504
1502 center + QPointF(-gc_left_body[point * 2], gc_left_body[point * 2 + 1]); 1505 qleft_handle[point] = center + QPointF(body_x, body_y);
1506 qright_handle[point] = center + QPointF(-body_x, body_y);
1503 } 1507 }
1504 for (std::size_t point = 0; point < gc_body.size() / 2; ++point) { 1508 for (std::size_t point = 0; point < gc_body.size() / 2; ++point) {
1505 qbody[point] = center + QPointF(gc_body[point * 2], gc_body[point * 2 + 1]); 1509 const float body_x = gc_body[point * 2 + 0];
1506 qbody[gc_body.size() - 1 - point] = 1510 const float body_y = gc_body[point * 2 + 1];
1507 center + QPointF(-gc_body[point * 2], gc_body[point * 2 + 1]); 1511
1512 qbody[point] = center + QPointF(body_x, body_y);
1513 qbody[gc_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
1508 } 1514 }
1509 for (std::size_t point = 0; point < 8; ++point) { 1515 for (std::size_t point = 0; point < 8; ++point) {
1510 left_hex[point] = 1516 const float point_cos = std::cos(point * angle);
1511 center + QPointF(34 * std::cos(point * angle) - 111, 34 * std::sin(point * angle) - 44); 1517 const float point_sin = std::sin(point * angle);
1512 right_hex[point] = 1518
1513 center + QPointF(26 * std::cos(point * angle) + 61, 26 * std::sin(point * angle) + 37); 1519 left_hex[point] = center + QPointF(34 * point_cos - 111, 34 * point_sin - 44);
1520 right_hex[point] = center + QPointF(26 * point_cos + 61, 26 * point_sin + 37);
1514 } 1521 }
1515 1522
1516 // Draw body 1523 // Draw body
@@ -1631,32 +1638,36 @@ void PlayerControlPreview::DrawDualBody(QPainter& p, const QPointF center) {
1631 constexpr float offset = 209.3f; 1638 constexpr float offset = 209.3f;
1632 1639
1633 for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) { 1640 for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) {
1634 left_joycon[point] = center + QPointF(left_joycon_body[point * 2] * size + offset, 1641 const float body_x = left_joycon_body[point * 2 + 0];
1635 left_joycon_body[point * 2 + 1] * size - 1); 1642 const float body_y = left_joycon_body[point * 2 + 1];
1636 right_joycon[point] = center + QPointF(-left_joycon_body[point * 2] * size - offset, 1643
1637 left_joycon_body[point * 2 + 1] * size - 1); 1644 left_joycon[point] = center + QPointF(body_x * size + offset, body_y * size - 1);
1645 right_joycon[point] = center + QPointF(-body_x * size - offset, body_y * size - 1);
1638 } 1646 }
1639 for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) { 1647 for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) {
1640 qleft_joycon_slider[point] = 1648 const float slider_x = left_joycon_slider[point * 2 + 0];
1641 center + QPointF(left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]); 1649 const float slider_y = left_joycon_slider[point * 2 + 1];
1642 qright_joycon_slider[point] = 1650
1643 center + QPointF(-left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]); 1651 qleft_joycon_slider[point] = center + QPointF(slider_x, slider_y);
1652 qright_joycon_slider[point] = center + QPointF(-slider_x, slider_y);
1644 } 1653 }
1645 for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) { 1654 for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) {
1655 const float top_view_x = left_joycon_topview[point * 2 + 0];
1656 const float top_view_y = left_joycon_topview[point * 2 + 1];
1657
1646 qleft_joycon_topview[point] = 1658 qleft_joycon_topview[point] =
1647 center + QPointF(left_joycon_topview[point * 2] * size2 - 52, 1659 center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
1648 left_joycon_topview[point * 2 + 1] * size2 - 52);
1649 qright_joycon_topview[point] = 1660 qright_joycon_topview[point] =
1650 center + QPointF(-left_joycon_topview[point * 2] * size2 + 52, 1661 center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
1651 left_joycon_topview[point * 2 + 1] * size2 - 52);
1652 } 1662 }
1653 for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) { 1663 for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) {
1664 const float top_view_x = left_joycon_slider_topview[point * 2 + 0];
1665 const float top_view_y = left_joycon_slider_topview[point * 2 + 1];
1666
1654 qleft_joycon_slider_topview[point] = 1667 qleft_joycon_slider_topview[point] =
1655 center + QPointF(left_joycon_slider_topview[point * 2] * size2 - 52, 1668 center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
1656 left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
1657 qright_joycon_slider_topview[point] = 1669 qright_joycon_slider_topview[point] =
1658 center + QPointF(-left_joycon_slider_topview[point * 2] * size2 + 52, 1670 center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
1659 left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
1660 } 1671 }
1661 1672
1662 // right joycon body 1673 // right joycon body
@@ -1905,18 +1916,19 @@ void PlayerControlPreview::DrawProTriggers(QPainter& p, const QPointF center, bo
1905 std::array<QPointF, pro_body_top.size()> qbody_top; 1916 std::array<QPointF, pro_body_top.size()> qbody_top;
1906 1917
1907 for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) { 1918 for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) {
1908 qleft_trigger[point] = 1919 const float trigger_x = pro_left_trigger[point * 2 + 0];
1909 center + QPointF(pro_left_trigger[point * 2], 1920 const float trigger_y = pro_left_trigger[point * 2 + 1];
1910 pro_left_trigger[point * 2 + 1] + (left_pressed ? 2 : 0)); 1921
1911 qright_trigger[point] = 1922 qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 2 : 0));
1912 center + QPointF(-pro_left_trigger[point * 2], 1923 qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 2 : 0));
1913 pro_left_trigger[point * 2 + 1] + (right_pressed ? 2 : 0));
1914 } 1924 }
1915 1925
1916 for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) { 1926 for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) {
1917 qbody_top[pro_body_top.size() - 1 - point] = 1927 const float top_x = pro_body_top[point * 2 + 0];
1918 center + QPointF(-pro_body_top[point * 2], pro_body_top[point * 2 + 1]); 1928 const float top_y = pro_body_top[point * 2 + 1];
1919 qbody_top[point] = center + QPointF(pro_body_top[point * 2], pro_body_top[point * 2 + 1]); 1929
1930 qbody_top[pro_body_top.size() - 1 - point] = center + QPointF(-top_x, top_y);
1931 qbody_top[point] = center + QPointF(top_x, top_y);
1920 } 1932 }
1921 1933
1922 // Pro body detail 1934 // Pro body detail
@@ -1939,12 +1951,11 @@ void PlayerControlPreview::DrawGCTriggers(QPainter& p, const QPointF center, boo
1939 std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger; 1951 std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger;
1940 1952
1941 for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) { 1953 for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) {
1942 qleft_trigger[point] = 1954 const float trigger_x = left_gc_trigger[point * 2 + 0];
1943 center + QPointF(left_gc_trigger[point * 2], 1955 const float trigger_y = left_gc_trigger[point * 2 + 1];
1944 left_gc_trigger[point * 2 + 1] + (left_pressed ? 10 : 0)); 1956
1945 qright_trigger[point] = 1957 qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 10 : 0));
1946 center + QPointF(-left_gc_trigger[point * 2], 1958 qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 10 : 0));
1947 left_gc_trigger[point * 2 + 1] + (right_pressed ? 10 : 0));
1948 } 1959 }
1949 1960
1950 // Left trigger 1961 // Left trigger
@@ -1973,12 +1984,13 @@ void PlayerControlPreview::DrawHandheldTriggers(QPainter& p, const QPointF cente
1973 std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger; 1984 std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger;
1974 1985
1975 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) { 1986 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
1987 const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
1988 const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
1989
1976 qleft_trigger[point] = 1990 qleft_trigger[point] =
1977 center + QPointF(left_joycon_trigger[point * 2], 1991 center + QPointF(left_trigger_x, left_trigger_y + (left_pressed ? 0.5f : 0));
1978 left_joycon_trigger[point * 2 + 1] + (left_pressed ? 0.5f : 0));
1979 qright_trigger[point] = 1992 qright_trigger[point] =
1980 center + QPointF(-left_joycon_trigger[point * 2], 1993 center + QPointF(-left_trigger_x, left_trigger_y + (right_pressed ? 0.5f : 0));
1981 left_joycon_trigger[point * 2 + 1] + (right_pressed ? 0.5f : 0));
1982 } 1994 }
1983 1995
1984 // Left trigger 1996 // Left trigger
@@ -1998,12 +2010,14 @@ void PlayerControlPreview::DrawDualTriggers(QPainter& p, const QPointF center, b
1998 constexpr float size = 1.62f; 2010 constexpr float size = 1.62f;
1999 constexpr float offset = 210.6f; 2011 constexpr float offset = 210.6f;
2000 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) { 2012 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
2001 qleft_trigger[point] = 2013 const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
2002 center + QPointF(left_joycon_trigger[point * 2] * size + offset, 2014 const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
2003 left_joycon_trigger[point * 2 + 1] * size + (left_pressed ? 0.5f : 0)); 2015
2004 qright_trigger[point] = center + QPointF(-left_joycon_trigger[point * 2] * size - offset, 2016 qleft_trigger[point] = center + QPointF(left_trigger_x * size + offset,
2005 left_joycon_trigger[point * 2 + 1] * size + 2017 left_trigger_y * size + (left_pressed ? 0.5f : 0));
2006 (right_pressed ? 0.5f : 0)); 2018 qright_trigger[point] =
2019 center + QPointF(-left_trigger_x * size - offset,
2020 left_trigger_y * size + (right_pressed ? 0.5f : 0));
2007 } 2021 }
2008 2022
2009 // Left trigger 2023 // Left trigger
@@ -2023,13 +2037,16 @@ void PlayerControlPreview::DrawDualTriggersTopView(QPainter& p, const QPointF ce
2023 constexpr float size = 0.9f; 2037 constexpr float size = 0.9f;
2024 2038
2025 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) { 2039 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
2026 qleft_trigger[point] = center + QPointF(left_joystick_L_topview[point * 2] * size - 50, 2040 const float top_view_x = left_joystick_L_topview[point * 2 + 0];
2027 left_joystick_L_topview[point * 2 + 1] * size - 52); 2041 const float top_view_y = left_joystick_L_topview[point * 2 + 1];
2042
2043 qleft_trigger[point] = center + QPointF(top_view_x * size - 50, top_view_y * size - 52);
2028 } 2044 }
2029 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) { 2045 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
2030 qright_trigger[point] = 2046 const float top_view_x = left_joystick_L_topview[point * 2 + 0];
2031 center + QPointF(-left_joystick_L_topview[point * 2] * size + 50, 2047 const float top_view_y = left_joystick_L_topview[point * 2 + 1];
2032 left_joystick_L_topview[point * 2 + 1] * size - 52); 2048
2049 qright_trigger[point] = center + QPointF(-top_view_x * size + 50, top_view_y * size - 52);
2033 } 2050 }
2034 2051
2035 p.setPen(colors.outline); 2052 p.setPen(colors.outline);
@@ -2323,7 +2340,7 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo
2323} 2340}
2324 2341
2325void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value, 2342void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value,
2326 const Input::AnalogProperties properties) { 2343 const Input::AnalogProperties& properties) {
2327 constexpr float size = 45.0f; 2344 constexpr float size = 45.0f;
2328 const float range = size * properties.range; 2345 const float range = size * properties.range;
2329 const float deadzone = size * properties.deadzone; 2346 const float deadzone = size * properties.deadzone;
@@ -2446,17 +2463,16 @@ void PlayerControlPreview::DrawArrowButtonOutline(QPainter& p, const QPointF cen
2446 std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline; 2463 std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline;
2447 2464
2448 for (std::size_t point = 0; point < arrow_points - 1; ++point) { 2465 for (std::size_t point = 0; point < arrow_points - 1; ++point) {
2449 arrow_button_outline[point] = center + QPointF(up_arrow_button[point * 2] * size, 2466 const float up_arrow_x = up_arrow_button[point * 2 + 0];
2450 up_arrow_button[point * 2 + 1] * size); 2467 const float up_arrow_y = up_arrow_button[point * 2 + 1];
2468
2469 arrow_button_outline[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
2451 arrow_button_outline[(arrow_points - 1) * 2 - point - 1] = 2470 arrow_button_outline[(arrow_points - 1) * 2 - point - 1] =
2452 center + 2471 center + QPointF(up_arrow_y * size, up_arrow_x * size);
2453 QPointF(up_arrow_button[point * 2 + 1] * size, up_arrow_button[point * 2] * size);
2454 arrow_button_outline[(arrow_points - 1) * 2 + point] = 2472 arrow_button_outline[(arrow_points - 1) * 2 + point] =
2455 center + 2473 center + QPointF(-up_arrow_x * size, -up_arrow_y * size);
2456 QPointF(-up_arrow_button[point * 2] * size, -up_arrow_button[point * 2 + 1] * size);
2457 arrow_button_outline[(arrow_points - 1) * 4 - point - 1] = 2474 arrow_button_outline[(arrow_points - 1) * 4 - point - 1] =
2458 center + 2475 center + QPointF(-up_arrow_y * size, -up_arrow_x * size);
2459 QPointF(-up_arrow_button[point * 2 + 1] * size, -up_arrow_button[point * 2] * size);
2460 } 2476 }
2461 // Draw arrow button outline 2477 // Draw arrow button outline
2462 p.setPen(colors.outline); 2478 p.setPen(colors.outline);
@@ -2470,22 +2486,21 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
2470 QPoint offset; 2486 QPoint offset;
2471 2487
2472 for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) { 2488 for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) {
2489 const float up_arrow_x = up_arrow_button[point * 2 + 0];
2490 const float up_arrow_y = up_arrow_button[point * 2 + 1];
2491
2473 switch (direction) { 2492 switch (direction) {
2474 case Direction::Up: 2493 case Direction::Up:
2475 arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size, 2494 arrow_button[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
2476 up_arrow_button[point * 2 + 1] * size);
2477 break; 2495 break;
2478 case Direction::Left: 2496 case Direction::Left:
2479 arrow_button[point] = center + QPointF(up_arrow_button[point * 2 + 1] * size, 2497 arrow_button[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
2480 up_arrow_button[point * 2] * size);
2481 break; 2498 break;
2482 case Direction::Right: 2499 case Direction::Right:
2483 arrow_button[point] = center + QPointF(-up_arrow_button[point * 2 + 1] * size, 2500 arrow_button[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
2484 up_arrow_button[point * 2] * size);
2485 break; 2501 break;
2486 case Direction::Down: 2502 case Direction::Down:
2487 arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size, 2503 arrow_button[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
2488 -up_arrow_button[point * 2 + 1] * size);
2489 break; 2504 break;
2490 case Direction::None: 2505 case Direction::None:
2491 break; 2506 break;
@@ -2524,17 +2539,17 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
2524void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center, 2539void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center,
2525 const Direction direction, bool pressed) { 2540 const Direction direction, bool pressed) {
2526 std::array<QPointF, trigger_button.size() / 2> qtrigger_button; 2541 std::array<QPointF, trigger_button.size() / 2> qtrigger_button;
2527 QPoint offset;
2528 2542
2529 for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) { 2543 for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) {
2544 const float trigger_button_x = trigger_button[point * 2 + 0];
2545 const float trigger_button_y = trigger_button[point * 2 + 1];
2546
2530 switch (direction) { 2547 switch (direction) {
2531 case Direction::Left: 2548 case Direction::Left:
2532 qtrigger_button[point] = 2549 qtrigger_button[point] = center + QPointF(-trigger_button_x, trigger_button_y);
2533 center + QPointF(-trigger_button[point * 2], trigger_button[point * 2 + 1]);
2534 break; 2550 break;
2535 case Direction::Right: 2551 case Direction::Right:
2536 qtrigger_button[point] = 2552 qtrigger_button[point] = center + QPointF(trigger_button_x, trigger_button_y);
2537 center + QPointF(trigger_button[point * 2], trigger_button[point * 2 + 1]);
2538 break; 2553 break;
2539 case Direction::Up: 2554 case Direction::Up:
2540 case Direction::Down: 2555 case Direction::Down:
@@ -2657,22 +2672,21 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di
2657 std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol; 2672 std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol;
2658 2673
2659 for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) { 2674 for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) {
2675 const float up_arrow_x = up_arrow_symbol[point * 2 + 0];
2676 const float up_arrow_y = up_arrow_symbol[point * 2 + 1];
2677
2660 switch (direction) { 2678 switch (direction) {
2661 case Direction::Up: 2679 case Direction::Up:
2662 arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size, 2680 arrow_symbol[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
2663 up_arrow_symbol[point * 2 + 1] * size);
2664 break; 2681 break;
2665 case Direction::Left: 2682 case Direction::Left:
2666 arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2 + 1] * size, 2683 arrow_symbol[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
2667 up_arrow_symbol[point * 2] * size);
2668 break; 2684 break;
2669 case Direction::Right: 2685 case Direction::Right:
2670 arrow_symbol[point] = center + QPointF(-up_arrow_symbol[point * 2 + 1] * size, 2686 arrow_symbol[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
2671 up_arrow_symbol[point * 2] * size);
2672 break; 2687 break;
2673 case Direction::Down: 2688 case Direction::Down:
2674 arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size, 2689 arrow_symbol[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
2675 -up_arrow_symbol[point * 2 + 1] * size);
2676 break; 2690 break;
2677 case Direction::None: 2691 case Direction::None:
2678 break; 2692 break;
diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h
index 676effbfd..91c3343f1 100644
--- a/src/yuzu/configuration/configure_input_player_widget.h
+++ b/src/yuzu/configuration/configure_input_player_widget.h
@@ -25,7 +25,7 @@ public:
25 25
26 void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param, 26 void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param,
27 const AnalogParam& analogs_param); 27 const AnalogParam& analogs_param);
28 void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_, 28 void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw& buttons_,
29 Settings::AnalogsRaw analogs_); 29 Settings::AnalogsRaw analogs_);
30 void SetConnectedStatus(bool checked); 30 void SetConnectedStatus(bool checked);
31 void SetControllerType(Settings::ControllerType type); 31 void SetControllerType(Settings::ControllerType type);
@@ -138,8 +138,8 @@ private:
138 // Draw joystick functions 138 // Draw joystick functions
139 void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed); 139 void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed);
140 void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed); 140 void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed);
141 void DrawRawJoystick(QPainter& p, QPointF center, const QPointF value, 141 void DrawRawJoystick(QPainter& p, QPointF center, QPointF value,
142 const Input::AnalogProperties properties); 142 const Input::AnalogProperties& properties);
143 void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed); 143 void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed);
144 void DrawGCJoystick(QPainter& p, QPointF center, bool pressed); 144 void DrawGCJoystick(QPainter& p, QPointF center, bool pressed);
145 145
diff --git a/src/yuzu/debugger/controller.cpp b/src/yuzu/debugger/controller.cpp
index 85724a8f3..2731d948d 100644
--- a/src/yuzu/debugger/controller.cpp
+++ b/src/yuzu/debugger/controller.cpp
@@ -42,7 +42,7 @@ void ControllerDialog::refreshConfiguration() {
42 42
43QAction* ControllerDialog::toggleViewAction() { 43QAction* ControllerDialog::toggleViewAction() {
44 if (toggle_view_action == nullptr) { 44 if (toggle_view_action == nullptr) {
45 toggle_view_action = new QAction(windowTitle(), this); 45 toggle_view_action = new QAction(tr("&Controller P1"), this);
46 toggle_view_action->setCheckable(true); 46 toggle_view_action->setCheckable(true);
47 toggle_view_action->setChecked(isVisible()); 47 toggle_view_action->setChecked(isVisible());
48 connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible); 48 connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ef92c25bc..0ba7c07cc 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -850,6 +850,16 @@ void GMainWindow::InitializeHotkeys() {
850 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), 850 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
851 &QShortcut::activated, this, 851 &QShortcut::activated, this,
852 [] { Settings::values.audio_muted = !Settings::values.audio_muted; }); 852 [] { Settings::values.audio_muted = !Settings::values.audio_muted; });
853
854 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this),
855 &QShortcut::activated, this, [&] {
856 Settings::values.mouse_panning = !Settings::values.mouse_panning;
857 if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
858 mouse_hide_timer.start();
859 render_window->installEventFilter(render_window);
860 render_window->setAttribute(Qt::WA_Hover, true);
861 }
862 });
853} 863}
854 864
855void GMainWindow::SetDefaultUIGeometry() { 865void GMainWindow::SetDefaultUIGeometry() {
@@ -1197,7 +1207,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
1197 multicore_status_button->setDisabled(true); 1207 multicore_status_button->setDisabled(true);
1198 renderer_status_button->setDisabled(true); 1208 renderer_status_button->setDisabled(true);
1199 1209
1200 if (UISettings::values.hide_mouse) { 1210 if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
1201 mouse_hide_timer.start(); 1211 mouse_hide_timer.start();
1202 render_window->installEventFilter(render_window); 1212 render_window->installEventFilter(render_window);
1203 render_window->setAttribute(Qt::WA_Hover, true); 1213 render_window->setAttribute(Qt::WA_Hover, true);
@@ -2359,7 +2369,7 @@ void GMainWindow::OnConfigure() {
2359 2369
2360 config->Save(); 2370 config->Save();
2361 2371
2362 if (UISettings::values.hide_mouse && emulation_running) { 2372 if ((UISettings::values.hide_mouse || Settings::values.mouse_panning) && emulation_running) {
2363 render_window->installEventFilter(render_window); 2373 render_window->installEventFilter(render_window);
2364 render_window->setAttribute(Qt::WA_Hover, true); 2374 render_window->setAttribute(Qt::WA_Hover, true);
2365 mouse_hide_timer.start(); 2375 mouse_hide_timer.start();
@@ -2480,6 +2490,11 @@ void GMainWindow::OnCaptureScreenshot() {
2480 .arg(title_id, 16, 16, QLatin1Char{'0'}) 2490 .arg(title_id, 16, 16, QLatin1Char{'0'})
2481 .arg(date); 2491 .arg(date);
2482 2492
2493 if (!Common::FS::CreateDir(screenshot_path.toStdString())) {
2494 OnStartGame();
2495 return;
2496 }
2497
2483#ifdef _WIN32 2498#ifdef _WIN32
2484 if (UISettings::values.enable_screenshot_save_as) { 2499 if (UISettings::values.enable_screenshot_save_as) {
2485 filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename, 2500 filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename,
@@ -2600,7 +2615,8 @@ void GMainWindow::UpdateUISettings() {
2600} 2615}
2601 2616
2602void GMainWindow::HideMouseCursor() { 2617void GMainWindow::HideMouseCursor() {
2603 if (emu_thread == nullptr || UISettings::values.hide_mouse == false) { 2618 if (emu_thread == nullptr ||
2619 (!UISettings::values.hide_mouse && !Settings::values.mouse_panning)) {
2604 mouse_hide_timer.stop(); 2620 mouse_hide_timer.stop();
2605 ShowMouseCursor(); 2621 ShowMouseCursor();
2606 return; 2622 return;
@@ -2610,13 +2626,16 @@ void GMainWindow::HideMouseCursor() {
2610 2626
2611void GMainWindow::ShowMouseCursor() { 2627void GMainWindow::ShowMouseCursor() {
2612 render_window->unsetCursor(); 2628 render_window->unsetCursor();
2613 if (emu_thread != nullptr && UISettings::values.hide_mouse) { 2629 if (emu_thread != nullptr &&
2630 (UISettings::values.hide_mouse || Settings::values.mouse_panning)) {
2614 mouse_hide_timer.start(); 2631 mouse_hide_timer.start();
2615 } 2632 }
2616} 2633}
2617 2634
2618void GMainWindow::OnMouseActivity() { 2635void GMainWindow::OnMouseActivity() {
2619 ShowMouseCursor(); 2636 if (!Settings::values.mouse_panning) {
2637 ShowMouseCursor();
2638 }
2620} 2639}
2621 2640
2622void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) { 2641void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) {
@@ -2751,7 +2770,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
2751 .arg(errors)); 2770 .arg(errors));
2752 } 2771 }
2753 2772
2754 QProgressDialog prog; 2773 QProgressDialog prog(this);
2755 prog.setRange(0, 0); 2774 prog.setRange(0, 0);
2756 prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your " 2775 prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your "
2757 "system's performance.")); 2776 "system's performance."));
@@ -2933,7 +2952,7 @@ void GMainWindow::filterBarSetChecked(bool state) {
2933} 2952}
2934 2953
2935void GMainWindow::UpdateUITheme() { 2954void GMainWindow::UpdateUITheme() {
2936 const QString default_icons = QStringLiteral(":/icons/default"); 2955 const QString default_icons = QStringLiteral("default");
2937 const QString& current_theme = UISettings::values.theme; 2956 const QString& current_theme = UISettings::values.theme;
2938 const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second); 2957 const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second);
2939 QStringList theme_paths(default_theme_paths); 2958 QStringList theme_paths(default_theme_paths);
@@ -2949,7 +2968,6 @@ void GMainWindow::UpdateUITheme() {
2949 qApp->setStyleSheet({}); 2968 qApp->setStyleSheet({});
2950 setStyleSheet({}); 2969 setStyleSheet({});
2951 } 2970 }
2952 theme_paths.append(default_icons);
2953 QIcon::setThemeName(default_icons); 2971 QIcon::setThemeName(default_icons);
2954 } else { 2972 } else {
2955 const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss")); 2973 const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss"));
@@ -2961,10 +2979,7 @@ void GMainWindow::UpdateUITheme() {
2961 } else { 2979 } else {
2962 LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found"); 2980 LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found");
2963 } 2981 }
2964 2982 QIcon::setThemeName(current_theme);
2965 const QString theme_name = QStringLiteral(":/icons/") + current_theme;
2966 theme_paths.append({default_icons, theme_name});
2967 QIcon::setThemeName(theme_name);
2968 } 2983 }
2969 2984
2970 QIcon::setThemeSearchPaths(theme_paths); 2985 QIcon::setThemeSearchPaths(theme_paths);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index e2ad5baf6..048870687 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -14,8 +14,8 @@
14 <string>yuzu</string> 14 <string>yuzu</string>
15 </property> 15 </property>
16 <property name="windowIcon"> 16 <property name="windowIcon">
17 <iconset> 17 <iconset resource="yuzu.qrc">
18 <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset> 18 <normaloff>:/img/yuzu.ico</normaloff>:/img/yuzu.ico</iconset>
19 </property> 19 </property>
20 <property name="tabShape"> 20 <property name="tabShape">
21 <enum>QTabWidget::Rounded</enum> 21 <enum>QTabWidget::Rounded</enum>
@@ -303,6 +303,8 @@
303 </property> 303 </property>
304 </action> 304 </action>
305 </widget> 305 </widget>
306 <resources/> 306 <resources>
307 <include location="yuzu.qrc"/>
308 </resources>
307 <connections/> 309 <connections/>
308</ui> 310</ui>
diff --git a/src/yuzu/yuzu.qrc b/src/yuzu/yuzu.qrc
new file mode 100644
index 000000000..5733cac98
--- /dev/null
+++ b/src/yuzu/yuzu.qrc
@@ -0,0 +1,5 @@
1<RCC>
2 <qresource prefix="/img">
3 <file alias="yuzu.ico">../../dist/yuzu.ico</file>
4 </qresource>
5</RCC>
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 0b3f2cb54..8461f8896 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -1,5 +1,15 @@
1set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) 1set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
2 2
3function(create_resource file output filename)
4 # Read hex data from file
5 file(READ ${file} filedata HEX)
6 # Convert hex data for C compatibility
7 string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
8 # Write data to output file
9 set(RESOURCES_DIR "${PROJECT_BINARY_DIR}/dist" PARENT_SCOPE)
10 file(WRITE "${PROJECT_BINARY_DIR}/dist/${output}" "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
11endfunction()
12
3add_executable(yuzu-cmd 13add_executable(yuzu-cmd
4 config.cpp 14 config.cpp
5 config.h 15 config.h
@@ -24,6 +34,9 @@ if (MSVC)
24endif() 34endif()
25target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads) 35target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads)
26 36
37create_resource("../../dist/yuzu.bmp" "yuzu_cmd/yuzu_icon.h" "yuzu_icon")
38target_include_directories(yuzu-cmd PRIVATE ${RESOURCES_DIR})
39
27target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) 40target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
28 41
29if(UNIX AND NOT APPLE) 42if(UNIX AND NOT APPLE)
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f76102459..6d8bc5509 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -329,9 +329,6 @@ void Config::ReadValues() {
329 FS::GetUserPath( 329 FS::GetUserPath(
330 FS::UserPath::DumpDir, 330 FS::UserPath::DumpDir,
331 sdl2_config->Get("Data Storage", "dump_directory", FS::GetUserPath(FS::UserPath::DumpDir))); 331 sdl2_config->Get("Data Storage", "dump_directory", FS::GetUserPath(FS::UserPath::DumpDir)));
332 FS::GetUserPath(FS::UserPath::CacheDir,
333 sdl2_config->Get("Data Storage", "cache_directory",
334 FS::GetUserPath(FS::UserPath::CacheDir)));
335 Settings::values.gamecard_inserted = 332 Settings::values.gamecard_inserted =
336 sdl2_config->GetBoolean("Data Storage", "gamecard_inserted", false); 333 sdl2_config->GetBoolean("Data Storage", "gamecard_inserted", false);
337 Settings::values.gamecard_current_game = 334 Settings::values.gamecard_current_game =
@@ -388,7 +385,7 @@ void Config::ReadValues() {
388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); 385 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)));
389 Settings::values.use_disk_shader_cache.SetValue( 386 Settings::values.use_disk_shader_cache.SetValue(
390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); 387 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
391 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); 388 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
392 Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); 389 Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
393 Settings::values.use_asynchronous_gpu_emulation.SetValue( 390 Settings::values.use_asynchronous_gpu_emulation.SetValue(
394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); 391 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7843d5167..7e391ab89 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -12,6 +12,7 @@
12#include "input_common/mouse/mouse_input.h" 12#include "input_common/mouse/mouse_input.h"
13#include "input_common/sdl/sdl.h" 13#include "input_common/sdl/sdl.h"
14#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 14#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
15#include "yuzu_cmd/yuzu_icon.h"
15 16
16EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_) 17EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_)
17 : input_subsystem{input_subsystem_} { 18 : input_subsystem{input_subsystem_} {
@@ -30,7 +31,8 @@ EmuWindow_SDL2::~EmuWindow_SDL2() {
30 31
31void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) { 32void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
32 TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0); 33 TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0);
33 input_subsystem->GetMouse()->MouseMove(x, y); 34
35 input_subsystem->GetMouse()->MouseMove(x, y, 0, 0);
34} 36}
35 37
36void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) { 38void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) {
@@ -193,6 +195,22 @@ void EmuWindow_SDL2::WaitEvent() {
193 } 195 }
194} 196}
195 197
198void EmuWindow_SDL2::SetWindowIcon() {
199 SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size);
200 if (yuzu_icon_stream == nullptr) {
201 LOG_WARNING(Frontend, "Failed to create yuzu icon stream.");
202 return;
203 }
204 SDL_Surface* const window_icon = SDL_LoadBMP_RW(yuzu_icon_stream, 1);
205 if (window_icon == nullptr) {
206 LOG_WARNING(Frontend, "Failed to read BMP from stream.");
207 return;
208 }
209 // The icon is attached to the window pointer
210 SDL_SetWindowIcon(render_window, window_icon);
211 SDL_FreeSurface(window_icon);
212}
213
196void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) { 214void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) {
197 SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second); 215 SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second);
198} 216}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
index a93141240..51a12a6a9 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
@@ -32,6 +32,9 @@ public:
32 /// Wait for the next event on the main thread. 32 /// Wait for the next event on the main thread.
33 void WaitEvent(); 33 void WaitEvent();
34 34
35 // Sets the window icon from yuzu.bmp
36 void SetWindowIcon();
37
35protected: 38protected:
36 /// Called by WaitEvent when a key is pressed or released. 39 /// Called by WaitEvent when a key is pressed or released.
37 void OnKeyEvent(int key, u8 state); 40 void OnKeyEvent(int key, u8 state);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index deddea9ee..a02485c14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -107,6 +107,8 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste
107 dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, 107 dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
108 SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); 108 SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
109 109
110 SetWindowIcon();
111
110 if (fullscreen) { 112 if (fullscreen) {
111 Fullscreen(); 113 Fullscreen();
112 } 114 }
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 3ba657c00..6f9b00461 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -35,6 +35,8 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
35 std::exit(EXIT_FAILURE); 35 std::exit(EXIT_FAILURE);
36 } 36 }
37 37
38 SetWindowIcon();
39
38 switch (wm.subsystem) { 40 switch (wm.subsystem) {
39#ifdef SDL_VIDEO_DRIVER_WINDOWS 41#ifdef SDL_VIDEO_DRIVER_WINDOWS
40 case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: 42 case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 0e1f3bdb3..982c41785 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
215 // Core is loaded, start the GPU (makes the GPU contexts current to this thread) 215 // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
216 system.GPU().Start(); 216 system.GPU().Start();
217 217
218 system.Renderer().Rasterizer().LoadDiskResources( 218 system.Renderer().ReadRasterizer()->LoadDiskResources(
219 system.CurrentProcess()->GetTitleID(), false, 219 system.CurrentProcess()->GetTitleID(), false,
220 [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); 220 [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
221 221