summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt2
-rw-r--r--src/audio_core/command_generator.cpp357
-rw-r--r--src/audio_core/command_generator.h5
-rw-r--r--src/audio_core/common.h23
-rw-r--r--src/audio_core/delay_line.cpp104
-rw-r--r--src/audio_core/delay_line.h46
-rw-r--r--src/audio_core/effect_context.cpp22
-rw-r--r--src/audio_core/effect_context.h31
-rw-r--r--src/audio_core/stream.cpp9
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/string_util.cpp14
-rw-r--r--src/core/CMakeLists.txt7
-rw-r--r--src/core/hle/kernel/client_port.cpp4
-rw-r--r--src/core/hle/kernel/client_session.cpp4
-rw-r--r--src/core/hle/kernel/errors.h43
-rw-r--r--src/core/hle/kernel/handle_table.cpp10
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp2
-rw-r--r--src/core/hle/kernel/k_address_arbiter.cpp28
-rw-r--r--src/core/hle/kernel/k_condition_variable.cpp20
-rw-r--r--src/core/hle/kernel/k_readable_event.cpp3
-rw-r--r--src/core/hle/kernel/k_resource_limit.cpp2
-rw-r--r--src/core/hle/kernel/k_scoped_resource_reservation.h67
-rw-r--r--src/core/hle/kernel/k_synchronization_object.cpp8
-rw-r--r--src/core/hle/kernel/k_thread.cpp30
-rw-r--r--src/core/hle/kernel/kernel.cpp17
-rw-r--r--src/core/hle/kernel/memory/memory_manager.cpp6
-rw-r--r--src/core/hle/kernel/memory/page_table.cpp85
-rw-r--r--src/core/hle/kernel/process.cpp39
-rw-r--r--src/core/hle/kernel/process_capability.cpp34
-rw-r--r--src/core/hle/kernel/server_port.cpp4
-rw-r--r--src/core/hle/kernel/session.cpp11
-rw-r--r--src/core/hle/kernel/shared_memory.cpp11
-rw-r--r--src/core/hle/kernel/svc.cpp244
-rw-r--r--src/core/hle/kernel/svc_results.h21
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp2
-rw-r--r--src/core/hle/service/am/am.cpp13
-rw-r--r--src/core/hle/service/am/applets/controller.cpp3
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp4
-rw-r--r--src/core/hle/service/hid/hid.cpp1
-rw-r--r--src/core/hle/service/ldn/errors.h13
-rw-r--r--src/core/hle/service/ldn/ldn.cpp36
-rw-r--r--src/core/hle/service/ldr/ldr.cpp6
-rw-r--r--src/core/hle/service/nfp/nfp.cpp2
-rw-r--r--src/core/hle/service/olsc/olsc.cpp13
-rw-r--r--src/core/hle/service/sockets/bsd.cpp6
-rw-r--r--src/core/settings.h5
-rw-r--r--src/input_common/mouse/mouse_input.cpp32
-rw-r--r--src/input_common/mouse/mouse_input.h7
-rw-r--r--src/input_common/mouse/mouse_poller.cpp3
-rw-r--r--src/input_common/udp/client.cpp4
-rw-r--r--src/input_common/udp/client.h3
-rw-r--r--src/input_common/udp/udp.cpp4
-rw-r--r--src/tests/video_core/buffer_base.cpp76
-rw-r--r--src/video_core/CMakeLists.txt17
-rw-r--r--src/video_core/buffer_cache/buffer_base.h217
-rw-r--r--src/video_core/buffer_cache/buffer_block.h62
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1656
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h93
-rw-r--r--src/video_core/command_classes/vic.cpp3
-rw-r--r--src/video_core/dirty_flags.cpp29
-rw-r--r--src/video_core/dirty_flags.h8
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp4
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/kepler_memory.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp21
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/fence_manager.h4
-rw-r--r--src/video_core/gpu.cpp8
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/gpu_thread.cpp12
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_quad_array.comp28
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp9
-rw-r--r--src/video_core/memory_manager.cpp4
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_base.h17
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp232
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h160
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp583
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h66
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp61
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h60
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp69
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h36
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h38
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h18
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp43
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h9
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp153
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h46
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp394
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h119
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp144
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h27
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp664
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h64
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp156
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h24
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp139
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h27
-rw-r--r--src/video_core/shader/async_shaders.cpp1
-rw-r--r--src/video_core/shader/async_shaders.h9
-rw-r--r--src/video_core/shader/decode/other.cpp1
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h58
-rw-r--r--src/video_core/texture_cache/util.cpp34
-rw-r--r--src/video_core/video_core.cpp19
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp213
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h7
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp78
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp50
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h37
-rw-r--r--src/yuzu/CMakeLists.txt1
-rw-r--r--src/yuzu/bootmanager.cpp16
-rw-r--r--src/yuzu/configuration/config.cpp18
-rw-r--r--src/yuzu/configuration/config.h2
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp22
-rw-r--r--src/yuzu/configuration/configure_input_advanced.cpp5
-rw-r--r--src/yuzu/configuration/configure_input_advanced.ui82
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.cpp279
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.h8
-rw-r--r--src/yuzu/debugger/controller.cpp2
-rw-r--r--src/yuzu/main.cpp39
-rw-r--r--src/yuzu/main.ui8
-rw-r--r--src/yuzu/yuzu.qrc5
-rw-r--r--src/yuzu_cmd/CMakeLists.txt13
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp20
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.h3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp2
-rw-r--r--src/yuzu_cmd/yuzu.cpp2
169 files changed, 4917 insertions, 3732 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index d1d177b51..a0ae07752 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -15,6 +15,8 @@ add_library(audio_core STATIC
15 command_generator.cpp 15 command_generator.cpp
16 command_generator.h 16 command_generator.h
17 common.h 17 common.h
18 delay_line.cpp
19 delay_line.h
18 effect_context.cpp 20 effect_context.cpp
19 effect_context.h 21 effect_context.h
20 info_updater.cpp 22 info_updater.cpp
diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp
index 5b1065520..437cc5ccd 100644
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cmath>
6#include <numbers>
5#include "audio_core/algorithm/interpolate.h" 7#include "audio_core/algorithm/interpolate.h"
6#include "audio_core/command_generator.h" 8#include "audio_core/command_generator.h"
7#include "audio_core/effect_context.h" 9#include "audio_core/effect_context.h"
@@ -13,6 +15,20 @@ namespace AudioCore {
13namespace { 15namespace {
14constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00; 16constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00;
15constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL; 17constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL;
18using DelayLineTimes = std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>;
19
20constexpr DelayLineTimes FDN_MIN_DELAY_LINE_TIMES{5.0f, 6.0f, 13.0f, 14.0f};
21constexpr DelayLineTimes FDN_MAX_DELAY_LINE_TIMES{45.704f, 82.782f, 149.94f, 271.58f};
22constexpr DelayLineTimes DECAY0_MAX_DELAY_LINE_TIMES{17.0f, 13.0f, 9.0f, 7.0f};
23constexpr DelayLineTimes DECAY1_MAX_DELAY_LINE_TIMES{19.0f, 11.0f, 10.0f, 6.0f};
24constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_TAP_TIMES{
25 0.017136f, 0.059154f, 0.161733f, 0.390186f, 0.425262f, 0.455411f, 0.689737f,
26 0.745910f, 0.833844f, 0.859502f, 0.000000f, 0.075024f, 0.168788f, 0.299901f,
27 0.337443f, 0.371903f, 0.599011f, 0.716741f, 0.817859f, 0.851664f};
28constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
29 0.67096f, 0.61027f, 1.0f, 0.35680f, 0.68361f, 0.65978f, 0.51939f,
30 0.24712f, 0.45945f, 0.45021f, 0.64196f, 0.54879f, 0.92925f, 0.38270f,
31 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f};
16 32
17template <std::size_t N> 33template <std::size_t N>
18void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { 34void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
@@ -65,6 +81,154 @@ s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
65 } 81 }
66} 82}
67 83
84float Pow10(float x) {
85 if (x >= 0.0f) {
86 return 1.0f;
87 } else if (x <= -5.3f) {
88 return 0.0f;
89 }
90 return std::pow(10.0f, x);
91}
92
93float SinD(float degrees) {
94 return std::sin(degrees * std::numbers::pi_v<float> / 180.0f);
95}
96
97float CosD(float degrees) {
98 return std::cos(degrees * std::numbers::pi_v<float> / 180.0f);
99}
100
101float ToFloat(s32 sample) {
102 return static_cast<float>(sample) / 65536.f;
103}
104
105s32 ToS32(float sample) {
106 constexpr auto min = -8388608.0f;
107 constexpr auto max = 8388607.f;
108 float rescaled_sample = sample * 65536.0f;
109 if (rescaled_sample < min) {
110 rescaled_sample = min;
111 }
112 if (rescaled_sample > max) {
113 rescaled_sample = max;
114 }
115 return static_cast<s32>(rescaled_sample);
116}
117
118constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_1CH{0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
120
121constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_2CH{0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
122 1, 1, 1, 0, 0, 0, 0, 1, 1, 1};
123
124constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_4CH{0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
125 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
126
127constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 2, 2, 2,
128 1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
129
130template <std::size_t CHANNEL_COUNT>
131void ApplyReverbGeneric(I3dl2ReverbState& state,
132 const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
133 const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
134 s32 sample_count) {
135
136 auto GetTapLookup = []() {
137 if constexpr (CHANNEL_COUNT == 1) {
138 return REVERB_TAP_INDEX_1CH;
139 } else if constexpr (CHANNEL_COUNT == 2) {
140 return REVERB_TAP_INDEX_2CH;
141 } else if constexpr (CHANNEL_COUNT == 4) {
142 return REVERB_TAP_INDEX_4CH;
143 } else if constexpr (CHANNEL_COUNT == 6) {
144 return REVERB_TAP_INDEX_6CH;
145 }
146 };
147
148 const auto& tap_index_lut = GetTapLookup();
149 for (s32 sample = 0; sample < sample_count; sample++) {
150 std::array<f32, CHANNEL_COUNT> out_samples{};
151 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fsamp{};
152 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> mixed{};
153 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> osamp{};
154
155 // Mix everything into a single sample
156 s32 temp_mixed_sample = 0;
157 for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
158 temp_mixed_sample += input[i][sample];
159 }
160 const auto current_sample = ToFloat(temp_mixed_sample);
161 const auto early_tap = state.early_delay_line.TapOut(state.early_to_late_taps);
162
163 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_TAPS; i++) {
164 const auto tapped_samp =
165 state.early_delay_line.TapOut(state.early_tap_steps[i]) * EARLY_GAIN[i];
166 out_samples[tap_index_lut[i]] += tapped_samp;
167
168 if constexpr (CHANNEL_COUNT == 6) {
169 // handle lfe
170 out_samples[5] += tapped_samp;
171 }
172 }
173
174 state.lowpass_0 = current_sample * state.lowpass_2 + state.lowpass_0 * state.lowpass_1;
175 state.early_delay_line.Tick(state.lowpass_0);
176
177 for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
178 out_samples[i] *= state.early_gain;
179 }
180
181 // Two channel seems to apply a latet gain, we require to save this
182 f32 filter{};
183 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
184 filter = state.fdn_delay_line[i].GetOutputSample();
185 const auto computed = filter * state.lpf_coefficients[0][i] + state.shelf_filter[i];
186 state.shelf_filter[i] =
187 filter * state.lpf_coefficients[1][i] + computed * state.lpf_coefficients[2][i];
188 fsamp[i] = computed;
189 }
190
191 // Mixing matrix
192 mixed[0] = fsamp[1] + fsamp[2];
193 mixed[1] = -fsamp[0] - fsamp[3];
194 mixed[2] = fsamp[0] - fsamp[3];
195 mixed[3] = fsamp[1] - fsamp[2];
196
197 if constexpr (CHANNEL_COUNT == 2) {
198 for (auto& mix : mixed) {
199 mix *= (filter * state.late_gain);
200 }
201 }
202
203 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
204 const auto late = early_tap * state.late_gain;
205 osamp[i] = state.decay_delay_line0[i].Tick(late + mixed[i]);
206 osamp[i] = state.decay_delay_line1[i].Tick(osamp[i]);
207 state.fdn_delay_line[i].Tick(osamp[i]);
208 }
209
210 if constexpr (CHANNEL_COUNT == 1) {
211 output[0][sample] = ToS32(state.dry_gain * ToFloat(input[0][sample]) +
212 (out_samples[0] + osamp[0] + osamp[1]));
213 } else if constexpr (CHANNEL_COUNT == 2 || CHANNEL_COUNT == 4) {
214 for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
215 output[i][sample] =
216 ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
217 }
218 } else if constexpr (CHANNEL_COUNT == 6) {
219 const auto temp_center = state.center_delay_line.Tick(0.5f * (osamp[2] - osamp[3]));
220 for (std::size_t i = 0; i < 4; i++) {
221 output[i][sample] =
222 ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
223 }
224 output[4][sample] =
225 ToS32(state.dry_gain * ToFloat(input[4][sample]) + (out_samples[4] + temp_center));
226 output[5][sample] =
227 ToS32(state.dry_gain * ToFloat(input[5][sample]) + (out_samples[5] + osamp[3]));
228 }
229 }
230}
231
68} // namespace 232} // namespace
69 233
70CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_, 234CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_,
@@ -271,11 +435,10 @@ void CommandGenerator::GenerateBiquadFilterCommandForVoice(ServerVoiceInfo& voic
271 } 435 }
272 436
273 // Generate biquad filter 437 // Generate biquad filter
274 // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter, 438 // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
275 // dsp_state.biquad_filter_state, 439 // dsp_state.biquad_filter_state,
276 // mix_buffer_count + channel, mix_buffer_count + 440 // mix_buffer_count + channel, mix_buffer_count + channel,
277 // channel, worker_params.sample_count, 441 // worker_params.sample_count, voice_info.GetInParams().node_id);
278 // voice_info.GetInParams().node_id);
279 } 442 }
280} 443}
281 444
@@ -376,21 +539,54 @@ void CommandGenerator::GenerateEffectCommand(ServerMixInfo& mix_info) {
376 539
377void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info, 540void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info,
378 bool enabled) { 541 bool enabled) {
379 if (!enabled) { 542 auto* reverb = dynamic_cast<EffectI3dl2Reverb*>(info);
543 const auto& params = reverb->GetParams();
544 auto& state = reverb->GetState();
545 const auto channel_count = params.channel_count;
546
547 if (channel_count != 1 && channel_count != 2 && channel_count != 4 && channel_count != 6) {
380 return; 548 return;
381 } 549 }
382 const auto& params = dynamic_cast<EffectI3dl2Reverb*>(info)->GetParams(); 550
383 const auto channel_count = params.channel_count; 551 std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
552 std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
553
554 const auto status = params.status;
384 for (s32 i = 0; i < channel_count; i++) { 555 for (s32 i = 0; i < channel_count; i++) {
385 // TODO(ogniK): Actually implement reverb 556 input[i] = GetMixBuffer(mix_buffer_offset + params.input[i]);
386 /* 557 output[i] = GetMixBuffer(mix_buffer_offset + params.output[i]);
387 if (params.input[i] != params.output[i]) { 558 }
388 const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); 559
389 auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); 560 if (enabled) {
390 ApplyMix<1>(output, input, 32768, worker_params.sample_count); 561 if (status == ParameterStatus::Initialized) {
391 }*/ 562 InitializeI3dl2Reverb(reverb->GetParams(), state, info->GetWorkBuffer());
392 auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); 563 } else if (status == ParameterStatus::Updating) {
393 std::memset(output, 0, worker_params.sample_count * sizeof(s32)); 564 UpdateI3dl2Reverb(reverb->GetParams(), state, false);
565 }
566 }
567
568 if (enabled) {
569 switch (channel_count) {
570 case 1:
571 ApplyReverbGeneric<1>(state, input, output, worker_params.sample_count);
572 break;
573 case 2:
574 ApplyReverbGeneric<2>(state, input, output, worker_params.sample_count);
575 break;
576 case 4:
577 ApplyReverbGeneric<4>(state, input, output, worker_params.sample_count);
578 break;
579 case 6:
580 ApplyReverbGeneric<6>(state, input, output, worker_params.sample_count);
581 break;
582 }
583 } else {
584 for (s32 i = 0; i < channel_count; i++) {
585 // Only copy if the buffer input and output do not match!
586 if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
587 std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
588 }
589 }
394 } 590 }
395} 591}
396 592
@@ -528,6 +724,133 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
528 return sample_count; 724 return sample_count;
529} 725}
530 726
727void CommandGenerator::InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
728 std::vector<u8>& work_buffer) {
729 // Reset state
730 state.lowpass_0 = 0.0f;
731 state.lowpass_1 = 0.0f;
732 state.lowpass_2 = 0.0f;
733
734 state.early_delay_line.Reset();
735 state.early_tap_steps.fill(0);
736 state.early_gain = 0.0f;
737 state.late_gain = 0.0f;
738 state.early_to_late_taps = 0;
739 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
740 state.fdn_delay_line[i].Reset();
741 state.decay_delay_line0[i].Reset();
742 state.decay_delay_line1[i].Reset();
743 }
744 state.last_reverb_echo = 0.0f;
745 state.center_delay_line.Reset();
746 for (auto& coef : state.lpf_coefficients) {
747 coef.fill(0.0f);
748 }
749 state.shelf_filter.fill(0.0f);
750 state.dry_gain = 0.0f;
751
752 const auto sample_rate = info.sample_rate / 1000;
753 f32* work_buffer_ptr = reinterpret_cast<f32*>(work_buffer.data());
754
755 s32 delay_samples{};
756 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
757 delay_samples =
758 AudioCommon::CalculateDelaySamples(sample_rate, FDN_MAX_DELAY_LINE_TIMES[i]);
759 state.fdn_delay_line[i].Initialize(delay_samples, work_buffer_ptr);
760 work_buffer_ptr += delay_samples + 1;
761
762 delay_samples =
763 AudioCommon::CalculateDelaySamples(sample_rate, DECAY0_MAX_DELAY_LINE_TIMES[i]);
764 state.decay_delay_line0[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
765 work_buffer_ptr += delay_samples + 1;
766
767 delay_samples =
768 AudioCommon::CalculateDelaySamples(sample_rate, DECAY1_MAX_DELAY_LINE_TIMES[i]);
769 state.decay_delay_line1[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
770 work_buffer_ptr += delay_samples + 1;
771 }
772 delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 5.0f);
773 state.center_delay_line.Initialize(delay_samples, work_buffer_ptr);
774 work_buffer_ptr += delay_samples + 1;
775
776 delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 400.0f);
777 state.early_delay_line.Initialize(delay_samples, work_buffer_ptr);
778
779 UpdateI3dl2Reverb(info, state, true);
780}
781
782void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
783 bool should_clear) {
784
785 state.dry_gain = info.dry_gain;
786 state.shelf_filter.fill(0.0f);
787 state.lowpass_0 = 0.0f;
788 state.early_gain = Pow10(std::min(info.room + info.reflection, 5000.0f) / 2000.0f);
789 state.late_gain = Pow10(std::min(info.room + info.reverb, 5000.0f) / 2000.0f);
790
791 const auto sample_rate = info.sample_rate / 1000;
792 const f32 hf_gain = Pow10(info.room_hf / 2000.0f);
793 if (hf_gain >= 1.0f) {
794 state.lowpass_2 = 1.0f;
795 state.lowpass_1 = 0.0f;
796 } else {
797 const auto a = 1.0f - hf_gain;
798 const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
799 static_cast<f32>(info.sample_rate)));
800 const auto c = std::sqrt(b * b - 4.0f * a * a);
801
802 state.lowpass_1 = (b - c) / (2.0f * a);
803 state.lowpass_2 = 1.0f - state.lowpass_1;
804 }
805 state.early_to_late_taps = AudioCommon::CalculateDelaySamples(
806 sample_rate, 1000.0f * (info.reflection_delay + info.reverb_delay));
807
808 state.last_reverb_echo = 0.6f * info.diffusion * 0.01f;
809 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
810 const auto length =
811 FDN_MIN_DELAY_LINE_TIMES[i] +
812 (info.density / 100.0f) * (FDN_MAX_DELAY_LINE_TIMES[i] - FDN_MIN_DELAY_LINE_TIMES[i]);
813 state.fdn_delay_line[i].SetDelay(AudioCommon::CalculateDelaySamples(sample_rate, length));
814
815 const auto delay_sample_counts = state.fdn_delay_line[i].GetDelay() +
816 state.decay_delay_line0[i].GetDelay() +
817 state.decay_delay_line1[i].GetDelay();
818
819 float a = (-60.0f * static_cast<f32>(delay_sample_counts)) /
820 (info.decay_time * static_cast<f32>(info.sample_rate));
821 float b = a / info.hf_decay_ratio;
822 float c = CosD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)) /
823 SinD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate));
824 float d = Pow10((b - a) / 40.0f);
825 float e = Pow10((b + a) / 40.0f) * 0.7071f;
826
827 state.lpf_coefficients[0][i] = e * ((d * c) + 1.0f) / (c + d);
828 state.lpf_coefficients[1][i] = e * (1.0f - (d * c)) / (c + d);
829 state.lpf_coefficients[2][i] = (c - d) / (c + d);
830
831 state.decay_delay_line0[i].SetCoefficient(state.last_reverb_echo);
832 state.decay_delay_line1[i].SetCoefficient(-0.9f * state.last_reverb_echo);
833 }
834
835 if (should_clear) {
836 for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
837 state.fdn_delay_line[i].Clear();
838 state.decay_delay_line0[i].Clear();
839 state.decay_delay_line1[i].Clear();
840 }
841 state.early_delay_line.Clear();
842 state.center_delay_line.Clear();
843 }
844
845 const auto max_early_delay = state.early_delay_line.GetMaxDelay();
846 const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
847 for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
848 const auto length = AudioCommon::CalculateDelaySamples(
849 sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
850 state.early_tap_steps[tap] = std::min(length, max_early_delay);
851 }
852}
853
531void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume, 854void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume,
532 s32 channel, s32 node_id) { 855 s32 channel, s32 node_id) {
533 const auto last = static_cast<s32>(last_volume * 32768.0f); 856 const auto last = static_cast<s32>(last_volume * 32768.0f);
diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h
index b937350b1..2ebb755b0 100644
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -21,6 +21,8 @@ class ServerMixInfo;
21class EffectContext; 21class EffectContext;
22class EffectBase; 22class EffectBase;
23struct AuxInfoDSP; 23struct AuxInfoDSP;
24struct I3dl2ReverbParams;
25struct I3dl2ReverbState;
24using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>; 26using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>;
25 27
26class CommandGenerator { 28class CommandGenerator {
@@ -80,6 +82,9 @@ private:
80 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, 82 s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
81 u32 sample_count, u32 read_offset, u32 read_count); 83 u32 sample_count, u32 read_offset, u32 read_count);
82 84
85 void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
86 std::vector<u8>& work_buffer);
87 void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
83 // DSP Code 88 // DSP Code
84 s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, 89 s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
85 s32 channel, std::size_t mix_offset); 90 s32 channel, std::size_t mix_offset);
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index ec59a3ba9..fe546c55d 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -33,6 +33,29 @@ constexpr std::size_t TEMP_MIX_BASE_SIZE = 0x3f00; // TODO(ogniK): Work out this
33// and our const ends up being 0x3f04, the 4 bytes are most 33// and our const ends up being 0x3f04, the 4 bytes are most
34// likely the sample history 34// likely the sample history
35constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY; 35constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY;
36constexpr f32 I3DL2REVERB_MAX_LEVEL = 5000.0f;
37constexpr f32 I3DL2REVERB_MIN_REFLECTION_DURATION = 0.02f;
38constexpr std::size_t I3DL2REVERB_TAPS = 20;
39constexpr std::size_t I3DL2REVERB_DELAY_LINE_COUNT = 4;
40using Fractional = s32;
41
42template <typename T>
43constexpr Fractional ToFractional(T x) {
44 return static_cast<Fractional>(x * static_cast<T>(0x4000));
45}
46
47constexpr Fractional MultiplyFractional(Fractional lhs, Fractional rhs) {
48 return static_cast<Fractional>(static_cast<s64>(lhs) * rhs >> 14);
49}
50
51constexpr s32 FractionalToFixed(Fractional x) {
52 const auto s = x & (1 << 13);
53 return static_cast<s32>(x >> 14) + s;
54}
55
56constexpr s32 CalculateDelaySamples(s32 sample_rate_khz, float time) {
57 return FractionalToFixed(MultiplyFractional(ToFractional(sample_rate_khz), ToFractional(time)));
58}
36 59
37static constexpr u32 VersionFromRevision(u32_le rev) { 60static constexpr u32 VersionFromRevision(u32_le rev) {
38 // "REV7" -> 7 61 // "REV7" -> 7
diff --git a/src/audio_core/delay_line.cpp b/src/audio_core/delay_line.cpp
new file mode 100644
index 000000000..f4e4dd8d2
--- /dev/null
+++ b/src/audio_core/delay_line.cpp
@@ -0,0 +1,104 @@
1#include <cstring>
2#include "audio_core/delay_line.h"
3
4namespace AudioCore {
5DelayLineBase::DelayLineBase() = default;
6DelayLineBase::~DelayLineBase() = default;
7
8void DelayLineBase::Initialize(s32 max_delay_, float* src_buffer) {
9 buffer = src_buffer;
10 buffer_end = buffer + max_delay_;
11 max_delay = max_delay_;
12 output = buffer;
13 SetDelay(max_delay_);
14 Clear();
15}
16
17void DelayLineBase::SetDelay(s32 new_delay) {
18 if (max_delay < new_delay) {
19 return;
20 }
21 delay = new_delay;
22 input = (buffer + ((output - buffer) + new_delay) % (max_delay + 1));
23}
24
25s32 DelayLineBase::GetDelay() const {
26 return delay;
27}
28
29s32 DelayLineBase::GetMaxDelay() const {
30 return max_delay;
31}
32
33f32 DelayLineBase::TapOut(s32 last_sample) {
34 const float* ptr = input - (last_sample + 1);
35 if (ptr < buffer) {
36 ptr += (max_delay + 1);
37 }
38
39 return *ptr;
40}
41
42f32 DelayLineBase::Tick(f32 sample) {
43 *(input++) = sample;
44 const auto out_sample = *(output++);
45
46 if (buffer_end < input) {
47 input = buffer;
48 }
49
50 if (buffer_end < output) {
51 output = buffer;
52 }
53
54 return out_sample;
55}
56
57float* DelayLineBase::GetInput() {
58 return input;
59}
60
61const float* DelayLineBase::GetInput() const {
62 return input;
63}
64
65f32 DelayLineBase::GetOutputSample() const {
66 return *output;
67}
68
69void DelayLineBase::Clear() {
70 std::memset(buffer, 0, sizeof(float) * max_delay);
71}
72
73void DelayLineBase::Reset() {
74 buffer = nullptr;
75 buffer_end = nullptr;
76 max_delay = 0;
77 input = nullptr;
78 output = nullptr;
79 delay = 0;
80}
81
82DelayLineAllPass::DelayLineAllPass() = default;
83DelayLineAllPass::~DelayLineAllPass() = default;
84
85void DelayLineAllPass::Initialize(u32 delay_, float coeffcient_, f32* src_buffer) {
86 DelayLineBase::Initialize(delay_, src_buffer);
87 SetCoefficient(coeffcient_);
88}
89
90void DelayLineAllPass::SetCoefficient(float coeffcient_) {
91 coefficient = coeffcient_;
92}
93
94f32 DelayLineAllPass::Tick(f32 sample) {
95 const auto temp = sample - coefficient * *output;
96 return coefficient * temp + DelayLineBase::Tick(temp);
97}
98
99void DelayLineAllPass::Reset() {
100 coefficient = 0.0f;
101 DelayLineBase::Reset();
102}
103
104} // namespace AudioCore
diff --git a/src/audio_core/delay_line.h b/src/audio_core/delay_line.h
new file mode 100644
index 000000000..cafddd432
--- /dev/null
+++ b/src/audio_core/delay_line.h
@@ -0,0 +1,46 @@
1#pragma once
2
3#include "common/common_types.h"
4
5namespace AudioCore {
6
7class DelayLineBase {
8public:
9 DelayLineBase();
10 ~DelayLineBase();
11
12 void Initialize(s32 max_delay_, float* src_buffer);
13 void SetDelay(s32 new_delay);
14 s32 GetDelay() const;
15 s32 GetMaxDelay() const;
16 f32 TapOut(s32 last_sample);
17 f32 Tick(f32 sample);
18 float* GetInput();
19 const float* GetInput() const;
20 f32 GetOutputSample() const;
21 void Clear();
22 void Reset();
23
24protected:
25 float* buffer{nullptr};
26 float* buffer_end{nullptr};
27 s32 max_delay{};
28 float* input{nullptr};
29 float* output{nullptr};
30 s32 delay{};
31};
32
33class DelayLineAllPass final : public DelayLineBase {
34public:
35 DelayLineAllPass();
36 ~DelayLineAllPass();
37
38 void Initialize(u32 delay, float coeffcient_, f32* src_buffer);
39 void SetCoefficient(float coeffcient_);
40 f32 Tick(f32 sample);
41 void Reset();
42
43private:
44 float coefficient{};
45};
46} // namespace AudioCore
diff --git a/src/audio_core/effect_context.cpp b/src/audio_core/effect_context.cpp
index f770b9608..89e4573c7 100644
--- a/src/audio_core/effect_context.cpp
+++ b/src/audio_core/effect_context.cpp
@@ -90,6 +90,14 @@ s32 EffectBase::GetProcessingOrder() const {
90 return processing_order; 90 return processing_order;
91} 91}
92 92
93std::vector<u8>& EffectBase::GetWorkBuffer() {
94 return work_buffer;
95}
96
97const std::vector<u8>& EffectBase::GetWorkBuffer() const {
98 return work_buffer;
99}
100
93EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {} 101EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {}
94EffectI3dl2Reverb::~EffectI3dl2Reverb() = default; 102EffectI3dl2Reverb::~EffectI3dl2Reverb() = default;
95 103
@@ -117,6 +125,12 @@ void EffectI3dl2Reverb::Update(EffectInfo::InParams& in_params) {
117 usage = UsageState::Initialized; 125 usage = UsageState::Initialized;
118 params.status = ParameterStatus::Initialized; 126 params.status = ParameterStatus::Initialized;
119 skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0; 127 skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0;
128 if (!skipped) {
129 auto& cur_work_buffer = GetWorkBuffer();
130 // Has two buffers internally
131 cur_work_buffer.resize(in_params.buffer_size * 2);
132 std::fill(cur_work_buffer.begin(), cur_work_buffer.end(), 0);
133 }
120 } 134 }
121} 135}
122 136
@@ -129,6 +143,14 @@ void EffectI3dl2Reverb::UpdateForCommandGeneration() {
129 GetParams().status = ParameterStatus::Updated; 143 GetParams().status = ParameterStatus::Updated;
130} 144}
131 145
146I3dl2ReverbState& EffectI3dl2Reverb::GetState() {
147 return state;
148}
149
150const I3dl2ReverbState& EffectI3dl2Reverb::GetState() const {
151 return state;
152}
153
132EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {} 154EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {}
133EffectBiquadFilter::~EffectBiquadFilter() = default; 155EffectBiquadFilter::~EffectBiquadFilter() = default;
134 156
diff --git a/src/audio_core/effect_context.h b/src/audio_core/effect_context.h
index c5e0b398c..5e0655dd7 100644
--- a/src/audio_core/effect_context.h
+++ b/src/audio_core/effect_context.h
@@ -8,6 +8,7 @@
8#include <memory> 8#include <memory>
9#include <vector> 9#include <vector>
10#include "audio_core/common.h" 10#include "audio_core/common.h"
11#include "audio_core/delay_line.h"
11#include "common/common_funcs.h" 12#include "common/common_funcs.h"
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/swap.h" 14#include "common/swap.h"
@@ -194,6 +195,8 @@ public:
194 [[nodiscard]] bool IsEnabled() const; 195 [[nodiscard]] bool IsEnabled() const;
195 [[nodiscard]] s32 GetMixID() const; 196 [[nodiscard]] s32 GetMixID() const;
196 [[nodiscard]] s32 GetProcessingOrder() const; 197 [[nodiscard]] s32 GetProcessingOrder() const;
198 [[nodiscard]] std::vector<u8>& GetWorkBuffer();
199 [[nodiscard]] const std::vector<u8>& GetWorkBuffer() const;
197 200
198protected: 201protected:
199 UsageState usage{UsageState::Invalid}; 202 UsageState usage{UsageState::Invalid};
@@ -201,6 +204,7 @@ protected:
201 s32 mix_id{}; 204 s32 mix_id{};
202 s32 processing_order{}; 205 s32 processing_order{};
203 bool enabled = false; 206 bool enabled = false;
207 std::vector<u8> work_buffer{};
204}; 208};
205 209
206template <typename T> 210template <typename T>
@@ -212,7 +216,7 @@ public:
212 return internal_params; 216 return internal_params;
213 } 217 }
214 218
215 const I3dl2ReverbParams& GetParams() const { 219 const T& GetParams() const {
216 return internal_params; 220 return internal_params;
217 } 221 }
218 222
@@ -229,6 +233,27 @@ public:
229 void UpdateForCommandGeneration() override; 233 void UpdateForCommandGeneration() override;
230}; 234};
231 235
236struct I3dl2ReverbState {
237 f32 lowpass_0{};
238 f32 lowpass_1{};
239 f32 lowpass_2{};
240
241 DelayLineBase early_delay_line{};
242 std::array<u32, AudioCommon::I3DL2REVERB_TAPS> early_tap_steps{};
243 f32 early_gain{};
244 f32 late_gain{};
245
246 u32 early_to_late_taps{};
247 std::array<DelayLineBase, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fdn_delay_line{};
248 std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line0{};
249 std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line1{};
250 f32 last_reverb_echo{};
251 DelayLineBase center_delay_line{};
252 std::array<std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>, 3> lpf_coefficients{};
253 std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> shelf_filter{};
254 f32 dry_gain{};
255};
256
232class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> { 257class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> {
233public: 258public:
234 explicit EffectI3dl2Reverb(); 259 explicit EffectI3dl2Reverb();
@@ -237,8 +262,12 @@ public:
237 void Update(EffectInfo::InParams& in_params) override; 262 void Update(EffectInfo::InParams& in_params) override;
238 void UpdateForCommandGeneration() override; 263 void UpdateForCommandGeneration() override;
239 264
265 I3dl2ReverbState& GetState();
266 const I3dl2ReverbState& GetState() const;
267
240private: 268private:
241 bool skipped = false; 269 bool skipped = false;
270 I3dl2ReverbState state{};
242}; 271};
243 272
244class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> { 273class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> {
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 5b0b285cd..b0f6f0c34 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -111,7 +111,14 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) {
111 111
112 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 112 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
113 113
114 core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer) - ns_late, release_event, {}); 114 const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer);
115
116 // If ns_late is higher than the update rate ignore the delay
117 if (ns_late > buffer_release_ns) {
118 ns_late = {};
119 }
120
121 core_timing.ScheduleEvent(buffer_release_ns - ns_late, release_event, {});
115} 122}
116 123
117void Stream::ReleaseActiveBuffer(std::chrono::nanoseconds ns_late) { 124void Stream::ReleaseActiveBuffer(std::chrono::nanoseconds ns_late) {
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bfd11e76d..263c457cd 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -206,6 +206,8 @@ if (MSVC)
206else() 206else()
207 target_compile_options(common PRIVATE 207 target_compile_options(common PRIVATE
208 -Werror 208 -Werror
209
210 $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
209 ) 211 )
210endif() 212endif()
211 213
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 4cba2aaa4..7b614ad89 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -141,27 +141,13 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
141} 141}
142 142
143std::string UTF16ToUTF8(const std::u16string& input) { 143std::string UTF16ToUTF8(const std::u16string& input) {
144#ifdef _MSC_VER
145 // Workaround for missing char16_t/char32_t instantiations in MSVC2017
146 std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
147 std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend());
148 return convert.to_bytes(tmp_buffer);
149#else
150 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; 144 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
151 return convert.to_bytes(input); 145 return convert.to_bytes(input);
152#endif
153} 146}
154 147
155std::u16string UTF8ToUTF16(const std::string& input) { 148std::u16string UTF8ToUTF16(const std::string& input) {
156#ifdef _MSC_VER
157 // Workaround for missing char16_t/char32_t instantiations in MSVC2017
158 std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
159 auto tmp_buffer = convert.from_bytes(input);
160 return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend());
161#else
162 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; 149 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
163 return convert.from_bytes(input); 150 return convert.from_bytes(input);
164#endif
165} 151}
166 152
167#ifdef _WIN32 153#ifdef _WIN32
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 386d7bddf..e74e6a668 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -148,7 +148,7 @@ add_library(core STATIC
148 hle/kernel/client_session.h 148 hle/kernel/client_session.h
149 hle/kernel/code_set.cpp 149 hle/kernel/code_set.cpp
150 hle/kernel/code_set.h 150 hle/kernel/code_set.h
151 hle/kernel/errors.h 151 hle/kernel/svc_results.h
152 hle/kernel/global_scheduler_context.cpp 152 hle/kernel/global_scheduler_context.cpp
153 hle/kernel/global_scheduler_context.h 153 hle/kernel/global_scheduler_context.h
154 hle/kernel/handle_table.cpp 154 hle/kernel/handle_table.cpp
@@ -174,6 +174,7 @@ add_library(core STATIC
174 hle/kernel/k_scheduler.h 174 hle/kernel/k_scheduler.h
175 hle/kernel/k_scheduler_lock.h 175 hle/kernel/k_scheduler_lock.h
176 hle/kernel/k_scoped_lock.h 176 hle/kernel/k_scoped_lock.h
177 hle/kernel/k_scoped_resource_reservation.h
177 hle/kernel/k_scoped_scheduler_lock_and_sleep.h 178 hle/kernel/k_scoped_scheduler_lock_and_sleep.h
178 hle/kernel/k_synchronization_object.cpp 179 hle/kernel/k_synchronization_object.cpp
179 hle/kernel/k_synchronization_object.h 180 hle/kernel/k_synchronization_object.h
@@ -223,7 +224,6 @@ add_library(core STATIC
223 hle/kernel/svc.cpp 224 hle/kernel/svc.cpp
224 hle/kernel/svc.h 225 hle/kernel/svc.h
225 hle/kernel/svc_common.h 226 hle/kernel/svc_common.h
226 hle/kernel/svc_results.h
227 hle/kernel/svc_types.h 227 hle/kernel/svc_types.h
228 hle/kernel/svc_wrap.h 228 hle/kernel/svc_wrap.h
229 hle/kernel/time_manager.cpp 229 hle/kernel/time_manager.cpp
@@ -400,6 +400,7 @@ add_library(core STATIC
400 hle/service/hid/controllers/xpad.h 400 hle/service/hid/controllers/xpad.h
401 hle/service/lbl/lbl.cpp 401 hle/service/lbl/lbl.cpp
402 hle/service/lbl/lbl.h 402 hle/service/lbl/lbl.h
403 hle/service/ldn/errors.h
403 hle/service/ldn/ldn.cpp 404 hle/service/ldn/ldn.cpp
404 hle/service/ldn/ldn.h 405 hle/service/ldn/ldn.h
405 hle/service/ldr/ldr.cpp 406 hle/service/ldr/ldr.cpp
@@ -653,6 +654,8 @@ else()
653 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> 654 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
654 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> 655 $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
655 656
657 $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation>
658
656 -Wno-sign-conversion 659 -Wno-sign-conversion
657 ) 660 )
658endif() 661endif()
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index f8f005f15..0b6957e31 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -4,11 +4,11 @@
4 4
5#include "core/hle/kernel/client_port.h" 5#include "core/hle/kernel/client_port.h"
6#include "core/hle/kernel/client_session.h" 6#include "core/hle/kernel/client_session.h"
7#include "core/hle/kernel/errors.h"
8#include "core/hle/kernel/hle_ipc.h" 7#include "core/hle/kernel/hle_ipc.h"
9#include "core/hle/kernel/object.h" 8#include "core/hle/kernel/object.h"
10#include "core/hle/kernel/server_port.h" 9#include "core/hle/kernel/server_port.h"
11#include "core/hle/kernel/session.h" 10#include "core/hle/kernel/session.h"
11#include "core/hle/kernel/svc_results.h"
12 12
13namespace Kernel { 13namespace Kernel {
14 14
@@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const {
21 21
22ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { 22ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
23 if (active_sessions >= max_sessions) { 23 if (active_sessions >= max_sessions) {
24 return ERR_MAX_CONNECTIONS_REACHED; 24 return ResultMaxConnectionsReached;
25 } 25 }
26 active_sessions++; 26 active_sessions++;
27 27
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp
index a2be1a8f6..e230f365a 100644
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -3,11 +3,11 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/client_session.h" 5#include "core/hle/kernel/client_session.h"
6#include "core/hle/kernel/errors.h"
7#include "core/hle/kernel/hle_ipc.h" 6#include "core/hle/kernel/hle_ipc.h"
8#include "core/hle/kernel/k_thread.h" 7#include "core/hle/kernel/k_thread.h"
9#include "core/hle/kernel/server_session.h" 8#include "core/hle/kernel/server_session.h"
10#include "core/hle/kernel/session.h" 9#include "core/hle/kernel/session.h"
10#include "core/hle/kernel/svc_results.h"
11#include "core/hle/result.h" 11#include "core/hle/result.h"
12 12
13namespace Kernel { 13namespace Kernel {
@@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread,
43 Core::Timing::CoreTiming& core_timing) { 43 Core::Timing::CoreTiming& core_timing) {
44 // Keep ServerSession alive until we're done working with it. 44 // Keep ServerSession alive until we're done working with it.
45 if (!parent->Server()) { 45 if (!parent->Server()) {
46 return ERR_SESSION_CLOSED_BY_REMOTE; 46 return ResultSessionClosedByRemote;
47 } 47 }
48 48
49 // Signal the server session that new data is available 49 // Signal the server session that new data is available
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
deleted file mode 100644
index 7d32a39f0..000000000
--- a/src/core/hle/kernel/errors.h
+++ /dev/null
@@ -1,43 +0,0 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Kernel {
10
11// Confirmed Switch kernel error codes
12
13constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
16constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
17constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
18constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
19constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
20constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
21constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
22constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
23constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
24constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
25constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
26constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
27constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112};
28constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114};
29constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
30constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
31constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
32constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
33constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
34constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
35constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
36constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
37constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122};
38constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123};
39constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125};
40constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126};
41constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132};
42
43} // namespace Kernel
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 1a2fa9cd8..f96d34078 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -6,12 +6,12 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/handle_table.h" 9#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/k_scheduler.h" 10#include "core/hle/kernel/k_scheduler.h"
12#include "core/hle/kernel/k_thread.h" 11#include "core/hle/kernel/k_thread.h"
13#include "core/hle/kernel/kernel.h" 12#include "core/hle/kernel/kernel.h"
14#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
14#include "core/hle/kernel/svc_results.h"
15 15
16namespace Kernel { 16namespace Kernel {
17namespace { 17namespace {
@@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default;
33ResultCode HandleTable::SetSize(s32 handle_table_size) { 33ResultCode HandleTable::SetSize(s32 handle_table_size) {
34 if (static_cast<u32>(handle_table_size) > MAX_COUNT) { 34 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
35 LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT); 35 LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT);
36 return ERR_OUT_OF_MEMORY; 36 return ResultOutOfMemory;
37 } 37 }
38 38
39 // Values less than or equal to zero indicate to use the maximum allowable 39 // Values less than or equal to zero indicate to use the maximum allowable
@@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) {
53 const u16 slot = next_free_slot; 53 const u16 slot = next_free_slot;
54 if (slot >= table_size) { 54 if (slot >= table_size) {
55 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 55 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
56 return ERR_HANDLE_TABLE_FULL; 56 return ResultHandleTableFull;
57 } 57 }
58 next_free_slot = generations[slot]; 58 next_free_slot = generations[slot];
59 59
@@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
76 std::shared_ptr<Object> object = GetGeneric(handle); 76 std::shared_ptr<Object> object = GetGeneric(handle);
77 if (object == nullptr) { 77 if (object == nullptr) {
78 LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle); 78 LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle);
79 return ERR_INVALID_HANDLE; 79 return ResultInvalidHandle;
80 } 80 }
81 return Create(std::move(object)); 81 return Create(std::move(object));
82} 82}
@@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
84ResultCode HandleTable::Close(Handle handle) { 84ResultCode HandleTable::Close(Handle handle) {
85 if (!IsValid(handle)) { 85 if (!IsValid(handle)) {
86 LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle); 86 LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle);
87 return ERR_INVALID_HANDLE; 87 return ResultInvalidHandle;
88 } 88 }
89 89
90 const u16 slot = GetSlot(handle); 90 const u16 slot = GetSlot(handle);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 7ec62cf18..161d9f782 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,7 +14,6 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "core/hle/ipc_helpers.h" 16#include "core/hle/ipc_helpers.h"
17#include "core/hle/kernel/errors.h"
18#include "core/hle/kernel/handle_table.h" 17#include "core/hle/kernel/handle_table.h"
19#include "core/hle/kernel/hle_ipc.h" 18#include "core/hle/kernel/hle_ipc.h"
20#include "core/hle/kernel/k_readable_event.h" 19#include "core/hle/kernel/k_readable_event.h"
@@ -26,6 +25,7 @@
26#include "core/hle/kernel/object.h" 25#include "core/hle/kernel/object.h"
27#include "core/hle/kernel/process.h" 26#include "core/hle/kernel/process.h"
28#include "core/hle/kernel/server_session.h" 27#include "core/hle/kernel/server_session.h"
28#include "core/hle/kernel/svc_results.h"
29#include "core/hle/kernel/time_manager.h" 29#include "core/hle/kernel/time_manager.h"
30#include "core/memory.h" 30#include "core/memory.h"
31 31
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index d0e90fd60..7018f56da 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
120 s32 user_value{}; 120 s32 user_value{};
121 if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) { 121 if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) {
122 LOG_ERROR(Kernel, "Invalid current memory!"); 122 LOG_ERROR(Kernel, "Invalid current memory!");
123 return Svc::ResultInvalidCurrentMemory; 123 return ResultInvalidCurrentMemory;
124 } 124 }
125 if (user_value != value) { 125 if (user_value != value) {
126 return Svc::ResultInvalidState; 126 return ResultInvalidState;
127 } 127 }
128 128
129 auto it = thread_tree.nfind_light({addr, -1}); 129 auto it = thread_tree.nfind_light({addr, -1});
@@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
189 189
190 if (!succeeded) { 190 if (!succeeded) {
191 LOG_ERROR(Kernel, "Invalid current memory!"); 191 LOG_ERROR(Kernel, "Invalid current memory!");
192 return Svc::ResultInvalidCurrentMemory; 192 return ResultInvalidCurrentMemory;
193 } 193 }
194 if (user_value != value) { 194 if (user_value != value) {
195 return Svc::ResultInvalidState; 195 return ResultInvalidState;
196 } 196 }
197 197
198 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && 198 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
@@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
221 // Check that the thread isn't terminating. 221 // Check that the thread isn't terminating.
222 if (cur_thread->IsTerminationRequested()) { 222 if (cur_thread->IsTerminationRequested()) {
223 slp.CancelSleep(); 223 slp.CancelSleep();
224 return Svc::ResultTerminationRequested; 224 return ResultTerminationRequested;
225 } 225 }
226 226
227 // Set the synced object. 227 // Set the synced object.
228 cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 228 cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
229 229
230 // Read the value from userspace. 230 // Read the value from userspace.
231 s32 user_value{}; 231 s32 user_value{};
@@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
238 238
239 if (!succeeded) { 239 if (!succeeded) {
240 slp.CancelSleep(); 240 slp.CancelSleep();
241 return Svc::ResultInvalidCurrentMemory; 241 return ResultInvalidCurrentMemory;
242 } 242 }
243 243
244 // Check that the value is less than the specified one. 244 // Check that the value is less than the specified one.
245 if (user_value >= value) { 245 if (user_value >= value) {
246 slp.CancelSleep(); 246 slp.CancelSleep();
247 return Svc::ResultInvalidState; 247 return ResultInvalidState;
248 } 248 }
249 249
250 // Check that the timeout is non-zero. 250 // Check that the timeout is non-zero.
251 if (timeout == 0) { 251 if (timeout == 0) {
252 slp.CancelSleep(); 252 slp.CancelSleep();
253 return Svc::ResultTimedOut; 253 return ResultTimedOut;
254 } 254 }
255 255
256 // Set the arbiter. 256 // Set the arbiter.
@@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
288 // Check that the thread isn't terminating. 288 // Check that the thread isn't terminating.
289 if (cur_thread->IsTerminationRequested()) { 289 if (cur_thread->IsTerminationRequested()) {
290 slp.CancelSleep(); 290 slp.CancelSleep();
291 return Svc::ResultTerminationRequested; 291 return ResultTerminationRequested;
292 } 292 }
293 293
294 // Set the synced object. 294 // Set the synced object.
295 cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 295 cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
296 296
297 // Read the value from userspace. 297 // Read the value from userspace.
298 s32 user_value{}; 298 s32 user_value{};
299 if (!ReadFromUser(system, &user_value, addr)) { 299 if (!ReadFromUser(system, &user_value, addr)) {
300 slp.CancelSleep(); 300 slp.CancelSleep();
301 return Svc::ResultInvalidCurrentMemory; 301 return ResultInvalidCurrentMemory;
302 } 302 }
303 303
304 // Check that the value is equal. 304 // Check that the value is equal.
305 if (value != user_value) { 305 if (value != user_value) {
306 slp.CancelSleep(); 306 slp.CancelSleep();
307 return Svc::ResultInvalidState; 307 return ResultInvalidState;
308 } 308 }
309 309
310 // Check that the timeout is non-zero. 310 // Check that the timeout is non-zero.
311 if (timeout == 0) { 311 if (timeout == 0) {
312 slp.CancelSleep(); 312 slp.CancelSleep();
313 return Svc::ResultTimedOut; 313 return ResultTimedOut;
314 } 314 }
315 315
316 // Set the arbiter. 316 // Set the arbiter.
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index f0ad8b390..170d8fa0d 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
92 // Write the value to userspace. 92 // Write the value to userspace.
93 if (!WriteToUser(system, addr, std::addressof(next_value))) { 93 if (!WriteToUser(system, addr, std::addressof(next_value))) {
94 if (next_owner_thread) { 94 if (next_owner_thread) {
95 next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); 95 next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
96 } 96 }
97 97
98 return Svc::ResultInvalidCurrentMemory; 98 return ResultInvalidCurrentMemory;
99 } 99 }
100 } 100 }
101 101
@@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val
114 cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); 114 cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);
115 115
116 // Check if the thread should terminate. 116 // Check if the thread should terminate.
117 R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested); 117 R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested);
118 118
119 { 119 {
120 // Read the tag from userspace. 120 // Read the tag from userspace.
121 u32 test_tag{}; 121 u32 test_tag{};
122 R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr), 122 R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
123 Svc::ResultInvalidCurrentMemory); 123 ResultInvalidCurrentMemory);
124 124
125 // If the tag isn't the handle (with wait mask), we're done. 125 // If the tag isn't the handle (with wait mask), we're done.
126 R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS); 126 R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);
127 127
128 // Get the lock owner thread. 128 // Get the lock owner thread.
129 owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle); 129 owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle);
130 R_UNLESS(owner_thread, Svc::ResultInvalidHandle); 130 R_UNLESS(owner_thread, ResultInvalidHandle);
131 131
132 // Update the lock. 132 // Update the lock.
133 cur_thread->SetAddressKey(addr, value); 133 cur_thread->SetAddressKey(addr, value);
@@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) {
191 thread_to_close = owner_thread.get(); 191 thread_to_close = owner_thread.get();
192 } else { 192 } else {
193 // The lock was tagged with a thread that doesn't exist. 193 // The lock was tagged with a thread that doesn't exist.
194 thread->SetSyncedObject(nullptr, Svc::ResultInvalidState); 194 thread->SetSyncedObject(nullptr, ResultInvalidState);
195 thread->Wakeup(); 195 thread->Wakeup();
196 } 196 }
197 } 197 }
198 } else { 198 } else {
199 // If the address wasn't accessible, note so. 199 // If the address wasn't accessible, note so.
200 thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); 200 thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
201 thread->Wakeup(); 201 thread->Wakeup();
202 } 202 }
203 203
@@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
263 KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout}; 263 KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout};
264 264
265 // Set the synced object. 265 // Set the synced object.
266 cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 266 cur_thread->SetSyncedObject(nullptr, ResultTimedOut);
267 267
268 // Check that the thread isn't terminating. 268 // Check that the thread isn't terminating.
269 if (cur_thread->IsTerminationRequested()) { 269 if (cur_thread->IsTerminationRequested()) {
270 slp.CancelSleep(); 270 slp.CancelSleep();
271 return Svc::ResultTerminationRequested; 271 return ResultTerminationRequested;
272 } 272 }
273 273
274 // Update the value and process for the next owner. 274 // Update the value and process for the next owner.
@@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
302 // Write the value to userspace. 302 // Write the value to userspace.
303 if (!WriteToUser(system, addr, std::addressof(next_value))) { 303 if (!WriteToUser(system, addr, std::addressof(next_value))) {
304 slp.CancelSleep(); 304 slp.CancelSleep();
305 return Svc::ResultInvalidCurrentMemory; 305 return ResultInvalidCurrentMemory;
306 } 306 }
307 } 307 }
308 308
diff --git a/src/core/hle/kernel/k_readable_event.cpp b/src/core/hle/kernel/k_readable_event.cpp
index d8a42dbaf..4b4d34857 100644
--- a/src/core/hle/kernel/k_readable_event.cpp
+++ b/src/core/hle/kernel/k_readable_event.cpp
@@ -6,7 +6,6 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/common_funcs.h" 7#include "common/common_funcs.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/k_readable_event.h" 9#include "core/hle/kernel/k_readable_event.h"
11#include "core/hle/kernel/k_scheduler.h" 10#include "core/hle/kernel/k_scheduler.h"
12#include "core/hle/kernel/k_thread.h" 11#include "core/hle/kernel/k_thread.h"
@@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() {
47 KScopedSchedulerLock lk{kernel}; 46 KScopedSchedulerLock lk{kernel};
48 47
49 if (!is_signaled) { 48 if (!is_signaled) {
50 return Svc::ResultInvalidState; 49 return ResultInvalidState;
51 } 50 }
52 51
53 is_signaled = false; 52 is_signaled = false;
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp
index ab2ab683f..d7a4a38e6 100644
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const {
75ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { 75ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
76 const auto index = static_cast<std::size_t>(which); 76 const auto index = static_cast<std::size_t>(which);
77 KScopedLightLock lk(lock); 77 KScopedLightLock lk(lock);
78 R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState); 78 R_UNLESS(current_values[index] <= value, ResultInvalidState);
79 79
80 limit_values[index] = value; 80 limit_values[index] = value;
81 81
diff --git a/src/core/hle/kernel/k_scoped_resource_reservation.h b/src/core/hle/kernel/k_scoped_resource_reservation.h
new file mode 100644
index 000000000..c5deca00b
--- /dev/null
+++ b/src/core/hle/kernel/k_scoped_resource_reservation.h
@@ -0,0 +1,67 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file references various implementation details from Atmosphere, an open-source firmware for
6// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
7
8#pragma once
9
10#include "common/common_types.h"
11#include "core/hle/kernel/k_resource_limit.h"
12#include "core/hle/kernel/process.h"
13
14namespace Kernel {
15
16class KScopedResourceReservation {
17public:
18 explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
19 s64 v, s64 timeout)
20 : resource_limit(std::move(l)), value(v), resource(r) {
21 if (resource_limit && value) {
22 success = resource_limit->Reserve(resource, value, timeout);
23 } else {
24 success = true;
25 }
26 }
27
28 explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
29 s64 v = 1)
30 : resource_limit(std::move(l)), value(v), resource(r) {
31 if (resource_limit && value) {
32 success = resource_limit->Reserve(resource, value);
33 } else {
34 success = true;
35 }
36 }
37
38 explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t)
39 : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {}
40
41 explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1)
42 : KScopedResourceReservation(p->GetResourceLimit(), r, v) {}
43
44 ~KScopedResourceReservation() noexcept {
45 if (resource_limit && value && success) {
46 // resource was not committed, release the reservation.
47 resource_limit->Release(resource, value);
48 }
49 }
50
51 /// Commit the resource reservation, destruction of this object does not release the resource
52 void Commit() {
53 resource_limit = nullptr;
54 }
55
56 [[nodiscard]] bool Succeeded() const {
57 return success;
58 }
59
60private:
61 std::shared_ptr<KResourceLimit> resource_limit;
62 s64 value;
63 LimitableResource resource;
64 bool success;
65};
66
67} // namespace Kernel
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index 140cc46a7..82f72a0fe 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
40 // Check if the timeout is zero. 40 // Check if the timeout is zero.
41 if (timeout == 0) { 41 if (timeout == 0) {
42 slp.CancelSleep(); 42 slp.CancelSleep();
43 return Svc::ResultTimedOut; 43 return ResultTimedOut;
44 } 44 }
45 45
46 // Check if the thread should terminate. 46 // Check if the thread should terminate.
47 if (thread->IsTerminationRequested()) { 47 if (thread->IsTerminationRequested()) {
48 slp.CancelSleep(); 48 slp.CancelSleep();
49 return Svc::ResultTerminationRequested; 49 return ResultTerminationRequested;
50 } 50 }
51 51
52 // Check if waiting was canceled. 52 // Check if waiting was canceled.
53 if (thread->IsWaitCancelled()) { 53 if (thread->IsWaitCancelled()) {
54 slp.CancelSleep(); 54 slp.CancelSleep();
55 thread->ClearWaitCancelled(); 55 thread->ClearWaitCancelled();
56 return Svc::ResultCancelled; 56 return ResultCancelled;
57 } 57 }
58 58
59 // Add the waiters. 59 // Add the waiters.
@@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
75 75
76 // Mark the thread as waiting. 76 // Mark the thread as waiting.
77 thread->SetCancellable(); 77 thread->SetCancellable();
78 thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); 78 thread->SetSyncedObject(nullptr, ResultTimedOut);
79 thread->SetState(ThreadState::Waiting); 79 thread->SetState(ThreadState::Waiting);
80 thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization); 80 thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
81 } 81 }
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index b59259c4f..e5620da5a 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -18,7 +18,6 @@
18#include "core/core.h" 18#include "core/core.h"
19#include "core/cpu_manager.h" 19#include "core/cpu_manager.h"
20#include "core/hardware_properties.h" 20#include "core/hardware_properties.h"
21#include "core/hle/kernel/errors.h"
22#include "core/hle/kernel/handle_table.h" 21#include "core/hle/kernel/handle_table.h"
23#include "core/hle/kernel/k_condition_variable.h" 22#include "core/hle/kernel/k_condition_variable.h"
24#include "core/hle/kernel/k_resource_limit.h" 23#include "core/hle/kernel/k_resource_limit.h"
@@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
127 126
128 // Set core ID and wait result. 127 // Set core ID and wait result.
129 core_id = phys_core; 128 core_id = phys_core;
130 wait_result = Svc::ResultNoSynchronizationObject; 129 wait_result = ResultNoSynchronizationObject;
131 130
132 // Set priorities. 131 // Set priorities.
133 priority = prio; 132 priority = prio;
@@ -238,7 +237,7 @@ void KThread::Finalize() {
238 while (it != waiter_list.end()) { 237 while (it != waiter_list.end()) {
239 // The thread shouldn't be a kernel waiter. 238 // The thread shouldn't be a kernel waiter.
240 it->SetLockOwner(nullptr); 239 it->SetLockOwner(nullptr);
241 it->SetSyncedObject(nullptr, Svc::ResultInvalidState); 240 it->SetSyncedObject(nullptr, ResultInvalidState);
242 it->Wakeup(); 241 it->Wakeup();
243 it = waiter_list.erase(it); 242 it = waiter_list.erase(it);
244 } 243 }
@@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
447 // If the core id is no-update magic, preserve the ideal core id. 446 // If the core id is no-update magic, preserve the ideal core id.
448 if (core_id == Svc::IdealCoreNoUpdate) { 447 if (core_id == Svc::IdealCoreNoUpdate) {
449 core_id = virtual_ideal_core_id; 448 core_id = virtual_ideal_core_id;
450 R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination); 449 R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination);
451 } 450 }
452 451
453 // Set the virtual core/affinity mask. 452 // Set the virtual core/affinity mask.
@@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
526 if (GetStackParameters().is_pinned) { 525 if (GetStackParameters().is_pinned) {
527 // Verify that the current thread isn't terminating. 526 // Verify that the current thread isn't terminating.
528 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), 527 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
529 Svc::ResultTerminationRequested); 528 ResultTerminationRequested);
530 529
531 // Note that the thread was pinned. 530 // Note that the thread was pinned.
532 thread_is_pinned = true; 531 thread_is_pinned = true;
@@ -604,7 +603,7 @@ void KThread::WaitCancel() {
604 sleeping_queue->WakeupThread(this); 603 sleeping_queue->WakeupThread(this);
605 wait_cancelled = true; 604 wait_cancelled = true;
606 } else { 605 } else {
607 SetSyncedObject(nullptr, Svc::ResultCancelled); 606 SetSyncedObject(nullptr, ResultCancelled);
608 SetState(ThreadState::Runnable); 607 SetState(ThreadState::Runnable);
609 wait_cancelled = false; 608 wait_cancelled = false;
610 } 609 }
@@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
663 // Verify our state. 662 // Verify our state.
664 const auto cur_state = GetState(); 663 const auto cur_state = GetState();
665 R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable), 664 R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable),
666 Svc::ResultInvalidState); 665 ResultInvalidState);
667 666
668 // Either pause or resume. 667 // Either pause or resume.
669 if (activity == Svc::ThreadActivity::Paused) { 668 if (activity == Svc::ThreadActivity::Paused) {
670 // Verify that we're not suspended. 669 // Verify that we're not suspended.
671 R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); 670 R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
672 671
673 // Suspend. 672 // Suspend.
674 RequestSuspend(SuspendType::Thread); 673 RequestSuspend(SuspendType::Thread);
@@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
676 ASSERT(activity == Svc::ThreadActivity::Runnable); 675 ASSERT(activity == Svc::ThreadActivity::Runnable);
677 676
678 // Verify that we're suspended. 677 // Verify that we're suspended.
679 R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); 678 R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
680 679
681 // Resume. 680 // Resume.
682 Resume(SuspendType::Thread); 681 Resume(SuspendType::Thread);
@@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
698 if (GetStackParameters().is_pinned) { 697 if (GetStackParameters().is_pinned) {
699 // Verify that the current thread isn't terminating. 698 // Verify that the current thread isn't terminating.
700 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), 699 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
701 Svc::ResultTerminationRequested); 700 ResultTerminationRequested);
702 701
703 // Note that the thread was pinned and not current. 702 // Note that the thread was pinned and not current.
704 thread_is_pinned = true; 703 thread_is_pinned = true;
@@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) {
745 KScopedSchedulerLock sl{kernel}; 744 KScopedSchedulerLock sl{kernel};
746 745
747 // Verify that we're suspended. 746 // Verify that we're suspended.
748 R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); 747 R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);
749 748
750 // If we're not terminating, get the thread's user context. 749 // If we're not terminating, get the thread's user context.
751 if (!IsTerminationRequested()) { 750 if (!IsTerminationRequested()) {
@@ -905,12 +904,11 @@ ResultCode KThread::Run() {
905 KScopedSchedulerLock lk{kernel}; 904 KScopedSchedulerLock lk{kernel};
906 905
907 // If either this thread or the current thread are requesting termination, note it. 906 // If either this thread or the current thread are requesting termination, note it.
908 R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested); 907 R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested);
909 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), 908 R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested);
910 Svc::ResultTerminationRequested);
911 909
912 // Ensure our thread state is correct. 910 // Ensure our thread state is correct.
913 R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState); 911 R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState);
914 912
915 // If the current thread has been asked to suspend, suspend it and retry. 913 // If the current thread has been asked to suspend, suspend it and retry.
916 if (GetCurrentThread(kernel).IsSuspended()) { 914 if (GetCurrentThread(kernel).IsSuspended()) {
@@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) {
962 // Check if the thread should terminate. 960 // Check if the thread should terminate.
963 if (IsTerminationRequested()) { 961 if (IsTerminationRequested()) {
964 slp.CancelSleep(); 962 slp.CancelSleep();
965 return Svc::ResultTerminationRequested; 963 return ResultTerminationRequested;
966 } 964 }
967 965
968 // Mark the thread as waiting. 966 // Mark the thread as waiting.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index b20c2d13a..b6e6f115e 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -26,7 +26,6 @@
26#include "core/device_memory.h" 26#include "core/device_memory.h"
27#include "core/hardware_properties.h" 27#include "core/hardware_properties.h"
28#include "core/hle/kernel/client_port.h" 28#include "core/hle/kernel/client_port.h"
29#include "core/hle/kernel/errors.h"
30#include "core/hle/kernel/handle_table.h" 29#include "core/hle/kernel/handle_table.h"
31#include "core/hle/kernel/k_resource_limit.h" 30#include "core/hle/kernel/k_resource_limit.h"
32#include "core/hle/kernel/k_scheduler.h" 31#include "core/hle/kernel/k_scheduler.h"
@@ -39,6 +38,7 @@
39#include "core/hle/kernel/process.h" 38#include "core/hle/kernel/process.h"
40#include "core/hle/kernel/service_thread.h" 39#include "core/hle/kernel/service_thread.h"
41#include "core/hle/kernel/shared_memory.h" 40#include "core/hle/kernel/shared_memory.h"
41#include "core/hle/kernel/svc_results.h"
42#include "core/hle/kernel/time_manager.h" 42#include "core/hle/kernel/time_manager.h"
43#include "core/hle/lock.h" 43#include "core/hle/lock.h"
44#include "core/hle/result.h" 44#include "core/hle/result.h"
@@ -141,11 +141,17 @@ struct KernelCore::Impl {
141 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess()); 141 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess());
142 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200) 142 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200)
143 .IsSuccess()); 143 .IsSuccess());
144 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess()); 144 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess());
145 145
146 if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) { 146 // Derived from recent software updates. The kernel reserves 27MB
147 constexpr u64 kernel_size{0x1b00000};
148 if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) {
147 UNREACHABLE(); 149 UNREACHABLE();
148 } 150 }
151 // Reserve secure applet memory, introduced in firmware 5.0.0
152 constexpr u64 secure_applet_memory_size{0x400000};
153 ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory,
154 secure_applet_memory_size));
149 } 155 }
150 156
151 void InitializePreemption(KernelCore& kernel) { 157 void InitializePreemption(KernelCore& kernel) {
@@ -302,8 +308,11 @@ struct KernelCore::Impl {
302 // Allocate slab heaps 308 // Allocate slab heaps
303 user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>(); 309 user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>();
304 310
311 constexpr u64 user_slab_heap_size{0x1ef000};
312 // Reserve slab heaps
313 ASSERT(
314 system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
305 // Initialize slab heaps 315 // Initialize slab heaps
306 constexpr u64 user_slab_heap_size{0x3de000};
307 user_slab_heap_pages->Initialize( 316 user_slab_heap_pages->Initialize(
308 system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), 317 system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
309 user_slab_heap_size); 318 user_slab_heap_size);
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp
index acf13585c..77f135cdc 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -8,9 +8,9 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/scope_exit.h" 10#include "common/scope_exit.h"
11#include "core/hle/kernel/errors.h"
12#include "core/hle/kernel/memory/memory_manager.h" 11#include "core/hle/kernel/memory/memory_manager.h"
13#include "core/hle/kernel/memory/page_linked_list.h" 12#include "core/hle/kernel/memory/page_linked_list.h"
13#include "core/hle/kernel/svc_results.h"
14 14
15namespace Kernel::Memory { 15namespace Kernel::Memory {
16 16
@@ -95,7 +95,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
95 // Choose a heap based on our page size request 95 // Choose a heap based on our page size request
96 const s32 heap_index{PageHeap::GetBlockIndex(num_pages)}; 96 const s32 heap_index{PageHeap::GetBlockIndex(num_pages)};
97 if (heap_index < 0) { 97 if (heap_index < 0) {
98 return ERR_OUT_OF_MEMORY; 98 return ResultOutOfMemory;
99 } 99 }
100 100
101 // TODO (bunnei): Support multiple managers 101 // TODO (bunnei): Support multiple managers
@@ -140,7 +140,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
140 140
141 // Only succeed if we allocated as many pages as we wanted 141 // Only succeed if we allocated as many pages as we wanted
142 if (num_pages) { 142 if (num_pages) {
143 return ERR_OUT_OF_MEMORY; 143 return ResultOutOfMemory;
144 } 144 }
145 145
146 // We succeeded! 146 // We succeeded!
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp
index 7de91c768..00ed9b881 100644
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -6,8 +6,7 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/scope_exit.h" 7#include "common/scope_exit.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/kernel/errors.h" 9#include "core/hle/kernel/k_scoped_resource_reservation.h"
10#include "core/hle/kernel/k_resource_limit.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/memory/address_space_info.h" 11#include "core/hle/kernel/memory/address_space_info.h"
13#include "core/hle/kernel/memory/memory_block.h" 12#include "core/hle/kernel/memory/memory_block.h"
@@ -16,6 +15,7 @@
16#include "core/hle/kernel/memory/page_table.h" 15#include "core/hle/kernel/memory/page_table.h"
17#include "core/hle/kernel/memory/system_control.h" 16#include "core/hle/kernel/memory/system_control.h"
18#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/svc_results.h"
19#include "core/memory.h" 19#include "core/memory.h"
20 20
21namespace Kernel::Memory { 21namespace Kernel::Memory {
@@ -141,7 +141,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
141 (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)}; 141 (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)};
142 if (alloc_size < needed_size) { 142 if (alloc_size < needed_size) {
143 UNREACHABLE(); 143 UNREACHABLE();
144 return ERR_OUT_OF_MEMORY; 144 return ResultOutOfMemory;
145 } 145 }
146 146
147 const std::size_t remaining_size{alloc_size - needed_size}; 147 const std::size_t remaining_size{alloc_size - needed_size};
@@ -277,11 +277,11 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt
277 const u64 size{num_pages * PageSize}; 277 const u64 size{num_pages * PageSize};
278 278
279 if (!CanContain(addr, size, state)) { 279 if (!CanContain(addr, size, state)) {
280 return ERR_INVALID_ADDRESS_STATE; 280 return ResultInvalidCurrentMemory;
281 } 281 }
282 282
283 if (IsRegionMapped(addr, size)) { 283 if (IsRegionMapped(addr, size)) {
284 return ERR_INVALID_ADDRESS_STATE; 284 return ResultInvalidCurrentMemory;
285 } 285 }
286 286
287 PageLinkedList page_linked_list; 287 PageLinkedList page_linked_list;
@@ -307,7 +307,7 @@ ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::
307 MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); 307 MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
308 308
309 if (IsRegionMapped(dst_addr, size)) { 309 if (IsRegionMapped(dst_addr, size)) {
310 return ERR_INVALID_ADDRESS_STATE; 310 return ResultInvalidCurrentMemory;
311 } 311 }
312 312
313 PageLinkedList page_linked_list; 313 PageLinkedList page_linked_list;
@@ -409,27 +409,25 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
409 return RESULT_SUCCESS; 409 return RESULT_SUCCESS;
410 } 410 }
411 411
412 auto process{system.Kernel().CurrentProcess()};
413 const std::size_t remaining_size{size - mapped_size}; 412 const std::size_t remaining_size{size - mapped_size};
414 const std::size_t remaining_pages{remaining_size / PageSize}; 413 const std::size_t remaining_pages{remaining_size / PageSize};
415 414
416 if (process->GetResourceLimit() && 415 // Reserve the memory from the process resource limit.
417 !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) { 416 KScopedResourceReservation memory_reservation(
418 return ERR_RESOURCE_LIMIT_EXCEEDED; 417 system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
418 remaining_size);
419 if (!memory_reservation.Succeeded()) {
420 LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size);
421 return ResultResourceLimitedExceeded;
419 } 422 }
420 423
421 PageLinkedList page_linked_list; 424 PageLinkedList page_linked_list;
422 {
423 auto block_guard = detail::ScopeExit([&] {
424 system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool);
425 process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size);
426 });
427 425
428 CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, 426 CASCADE_CODE(
429 memory_pool)); 427 system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool));
430 428
431 block_guard.Cancel(); 429 // We succeeded, so commit the memory reservation.
432 } 430 memory_reservation.Commit();
433 431
434 MapPhysicalMemory(page_linked_list, addr, end_addr); 432 MapPhysicalMemory(page_linked_list, addr, end_addr);
435 433
@@ -454,12 +452,12 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
454 block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { 452 block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
455 if (info.state == MemoryState::Normal) { 453 if (info.state == MemoryState::Normal) {
456 if (info.attribute != MemoryAttribute::None) { 454 if (info.attribute != MemoryAttribute::None) {
457 result = ERR_INVALID_ADDRESS_STATE; 455 result = ResultInvalidCurrentMemory;
458 return; 456 return;
459 } 457 }
460 mapped_size += GetSizeInRange(info, addr, end_addr); 458 mapped_size += GetSizeInRange(info, addr, end_addr);
461 } else if (info.state != MemoryState::Free) { 459 } else if (info.state != MemoryState::Free) {
462 result = ERR_INVALID_ADDRESS_STATE; 460 result = ResultInvalidCurrentMemory;
463 } 461 }
464 }); 462 });
465 463
@@ -526,7 +524,7 @@ ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
526 MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); 524 MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));
527 525
528 if (IsRegionMapped(dst_addr, size)) { 526 if (IsRegionMapped(dst_addr, size)) {
529 return ERR_INVALID_ADDRESS_STATE; 527 return ResultInvalidCurrentMemory;
530 } 528 }
531 529
532 PageLinkedList page_linked_list; 530 PageLinkedList page_linked_list;
@@ -577,7 +575,7 @@ ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
577 AddRegionToPages(dst_addr, num_pages, dst_pages); 575 AddRegionToPages(dst_addr, num_pages, dst_pages);
578 576
579 if (!dst_pages.IsEqual(src_pages)) { 577 if (!dst_pages.IsEqual(src_pages)) {
580 return ERR_INVALID_MEMORY_RANGE; 578 return ResultInvalidMemoryRange;
581 } 579 }
582 580
583 { 581 {
@@ -626,11 +624,11 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem
626 const std::size_t size{num_pages * PageSize}; 624 const std::size_t size{num_pages * PageSize};
627 625
628 if (!CanContain(addr, size, state)) { 626 if (!CanContain(addr, size, state)) {
629 return ERR_INVALID_ADDRESS_STATE; 627 return ResultInvalidCurrentMemory;
630 } 628 }
631 629
632 if (IsRegionMapped(addr, num_pages * PageSize)) { 630 if (IsRegionMapped(addr, num_pages * PageSize)) {
633 return ERR_INVALID_ADDRESS_STATE; 631 return ResultInvalidCurrentMemory;
634 } 632 }
635 633
636 CASCADE_CODE(MapPages(addr, page_linked_list, perm)); 634 CASCADE_CODE(MapPages(addr, page_linked_list, perm));
@@ -768,7 +766,7 @@ ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
768ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { 766ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
769 767
770 if (size > heap_region_end - heap_region_start) { 768 if (size > heap_region_end - heap_region_start) {
771 return ERR_OUT_OF_MEMORY; 769 return ResultOutOfMemory;
772 } 770 }
773 771
774 const u64 previous_heap_size{GetHeapSize()}; 772 const u64 previous_heap_size{GetHeapSize()};
@@ -781,10 +779,14 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
781 779
782 const u64 delta{size - previous_heap_size}; 780 const u64 delta{size - previous_heap_size};
783 781
784 auto process{system.Kernel().CurrentProcess()}; 782 // Reserve memory for the heap extension.
785 if (process->GetResourceLimit() && delta != 0 && 783 KScopedResourceReservation memory_reservation(
786 !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) { 784 system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
787 return ERR_RESOURCE_LIMIT_EXCEEDED; 785 delta);
786
787 if (!memory_reservation.Succeeded()) {
788 LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
789 return ResultResourceLimitedExceeded;
788 } 790 }
789 791
790 PageLinkedList page_linked_list; 792 PageLinkedList page_linked_list;
@@ -794,12 +796,15 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
794 system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); 796 system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));
795 797
796 if (IsRegionMapped(current_heap_addr, delta)) { 798 if (IsRegionMapped(current_heap_addr, delta)) {
797 return ERR_INVALID_ADDRESS_STATE; 799 return ResultInvalidCurrentMemory;
798 } 800 }
799 801
800 CASCADE_CODE( 802 CASCADE_CODE(
801 Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); 803 Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));
802 804
805 // Succeeded in allocation, commit the resource reservation
806 memory_reservation.Commit();
807
803 block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal, 808 block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal,
804 MemoryPermission::ReadAndWrite); 809 MemoryPermission::ReadAndWrite);
805 810
@@ -816,17 +821,17 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
816 std::lock_guard lock{page_table_lock}; 821 std::lock_guard lock{page_table_lock};
817 822
818 if (!CanContain(region_start, region_num_pages * PageSize, state)) { 823 if (!CanContain(region_start, region_num_pages * PageSize, state)) {
819 return ERR_INVALID_ADDRESS_STATE; 824 return ResultInvalidCurrentMemory;
820 } 825 }
821 826
822 if (region_num_pages <= needed_num_pages) { 827 if (region_num_pages <= needed_num_pages) {
823 return ERR_OUT_OF_MEMORY; 828 return ResultOutOfMemory;
824 } 829 }
825 830
826 const VAddr addr{ 831 const VAddr addr{
827 AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)}; 832 AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)};
828 if (!addr) { 833 if (!addr) {
829 return ERR_OUT_OF_MEMORY; 834 return ResultOutOfMemory;
830 } 835 }
831 836
832 if (is_map_only) { 837 if (is_map_only) {
@@ -1105,13 +1110,13 @@ constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryS
1105 MemoryAttribute attr) const { 1110 MemoryAttribute attr) const {
1106 // Validate the states match expectation 1111 // Validate the states match expectation
1107 if ((info.state & state_mask) != state) { 1112 if ((info.state & state_mask) != state) {
1108 return ERR_INVALID_ADDRESS_STATE; 1113 return ResultInvalidCurrentMemory;
1109 } 1114 }
1110 if ((info.perm & perm_mask) != perm) { 1115 if ((info.perm & perm_mask) != perm) {
1111 return ERR_INVALID_ADDRESS_STATE; 1116 return ResultInvalidCurrentMemory;
1112 } 1117 }
1113 if ((info.attribute & attr_mask) != attr) { 1118 if ((info.attribute & attr_mask) != attr) {
1114 return ERR_INVALID_ADDRESS_STATE; 1119 return ResultInvalidCurrentMemory;
1115 } 1120 }
1116 1121
1117 return RESULT_SUCCESS; 1122 return RESULT_SUCCESS;
@@ -1138,14 +1143,14 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission*
1138 while (true) { 1143 while (true) {
1139 // Validate the current block 1144 // Validate the current block
1140 if (!(info.state == first_state)) { 1145 if (!(info.state == first_state)) {
1141 return ERR_INVALID_ADDRESS_STATE; 1146 return ResultInvalidCurrentMemory;
1142 } 1147 }
1143 if (!(info.perm == first_perm)) { 1148 if (!(info.perm == first_perm)) {
1144 return ERR_INVALID_ADDRESS_STATE; 1149 return ResultInvalidCurrentMemory;
1145 } 1150 }
1146 if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) == 1151 if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) ==
1147 (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) { 1152 (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) {
1148 return ERR_INVALID_ADDRESS_STATE; 1153 return ResultInvalidCurrentMemory;
1149 } 1154 }
1150 1155
1151 // Validate against the provided masks 1156 // Validate against the provided masks
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 2286b292d..47b3ac57b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -14,9 +14,9 @@
14#include "core/device_memory.h" 14#include "core/device_memory.h"
15#include "core/file_sys/program_metadata.h" 15#include "core/file_sys/program_metadata.h"
16#include "core/hle/kernel/code_set.h" 16#include "core/hle/kernel/code_set.h"
17#include "core/hle/kernel/errors.h"
18#include "core/hle/kernel/k_resource_limit.h" 17#include "core/hle/kernel/k_resource_limit.h"
19#include "core/hle/kernel/k_scheduler.h" 18#include "core/hle/kernel/k_scheduler.h"
19#include "core/hle/kernel/k_scoped_resource_reservation.h"
20#include "core/hle/kernel/k_thread.h" 20#include "core/hle/kernel/k_thread.h"
21#include "core/hle/kernel/kernel.h" 21#include "core/hle/kernel/kernel.h"
22#include "core/hle/kernel/memory/memory_block_manager.h" 22#include "core/hle/kernel/memory/memory_block_manager.h"
@@ -39,6 +39,7 @@ namespace {
39 */ 39 */
40void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { 40void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
41 const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); 41 const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
42 ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
42 auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0, 43 auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0,
43 owner_process.GetIdealCoreId(), stack_top, &owner_process); 44 owner_process.GetIdealCoreId(), stack_top, &owner_process);
44 45
@@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
117 118
118 std::shared_ptr<Process> process = std::make_shared<Process>(system); 119 std::shared_ptr<Process> process = std::make_shared<Process>(system);
119 process->name = std::move(name); 120 process->name = std::move(name);
121
122 // TODO: This is inaccurate
123 // The process should hold a reference to the kernel-wide resource limit.
120 process->resource_limit = std::make_shared<KResourceLimit>(kernel, system); 124 process->resource_limit = std::make_shared<KResourceLimit>(kernel, system);
121 process->status = ProcessStatus::Created; 125 process->status = ProcessStatus::Created;
122 process->program_id = 0; 126 process->program_id = 0;
@@ -155,6 +159,9 @@ void Process::DecrementThreadCount() {
155} 159}
156 160
157u64 Process::GetTotalPhysicalMemoryAvailable() const { 161u64 Process::GetTotalPhysicalMemoryAvailable() const {
162 // TODO: This is expected to always return the application memory pool size after accurately
163 // reserving kernel resources. The current workaround uses a process-local resource limit of
164 // application memory pool size, which is inaccurate.
158 const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) + 165 const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
159 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + 166 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
160 main_thread_stack_size}; 167 main_thread_stack_size};
@@ -248,8 +255,8 @@ ResultCode Process::Reset() {
248 KScopedSchedulerLock sl{kernel}; 255 KScopedSchedulerLock sl{kernel};
249 256
250 // Validate that we're in a state that we can reset. 257 // Validate that we're in a state that we can reset.
251 R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState); 258 R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState);
252 R_UNLESS(is_signaled, Svc::ResultInvalidState); 259 R_UNLESS(is_signaled, ResultInvalidState);
253 260
254 // Clear signaled. 261 // Clear signaled.
255 is_signaled = false; 262 is_signaled = false;
@@ -264,6 +271,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
264 system_resource_size = metadata.GetSystemResourceSize(); 271 system_resource_size = metadata.GetSystemResourceSize();
265 image_size = code_size; 272 image_size = code_size;
266 273
274 // Set initial resource limits
275 resource_limit->SetLimitValue(
276 LimitableResource::PhysicalMemory,
277 kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
278 KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
279 code_size + system_resource_size);
280 if (!memory_reservation.Succeeded()) {
281 LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes",
282 code_size + system_resource_size);
283 return ResultResourceLimitedExceeded;
284 }
267 // Initialize proces address space 285 // Initialize proces address space
268 if (const ResultCode result{ 286 if (const ResultCode result{
269 page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000, 287 page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000,
@@ -305,24 +323,22 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
305 UNREACHABLE(); 323 UNREACHABLE();
306 } 324 }
307 325
308 // Set initial resource limits
309 resource_limit->SetLimitValue(
310 LimitableResource::PhysicalMemory,
311 kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
312 resource_limit->SetLimitValue(LimitableResource::Threads, 608); 326 resource_limit->SetLimitValue(LimitableResource::Threads, 608);
313 resource_limit->SetLimitValue(LimitableResource::Events, 700); 327 resource_limit->SetLimitValue(LimitableResource::Events, 700);
314 resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128); 328 resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128);
315 resource_limit->SetLimitValue(LimitableResource::Sessions, 894); 329 resource_limit->SetLimitValue(LimitableResource::Sessions, 894);
316 ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size));
317 330
318 // Create TLS region 331 // Create TLS region
319 tls_region_address = CreateTLSRegion(); 332 tls_region_address = CreateTLSRegion();
333 memory_reservation.Commit();
320 334
321 return handle_table.SetSize(capabilities.GetHandleTableSize()); 335 return handle_table.SetSize(capabilities.GetHandleTableSize());
322} 336}
323 337
324void Process::Run(s32 main_thread_priority, u64 stack_size) { 338void Process::Run(s32 main_thread_priority, u64 stack_size) {
325 AllocateMainThreadStack(stack_size); 339 AllocateMainThreadStack(stack_size);
340 resource_limit->Reserve(LimitableResource::Threads, 1);
341 resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
326 342
327 const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; 343 const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
328 ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); 344 ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
@@ -330,8 +346,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
330 ChangeStatus(ProcessStatus::Running); 346 ChangeStatus(ProcessStatus::Running);
331 347
332 SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); 348 SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
333 resource_limit->Reserve(LimitableResource::Threads, 1);
334 resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
335} 349}
336 350
337void Process::PrepareForTermination() { 351void Process::PrepareForTermination() {
@@ -358,6 +372,11 @@ void Process::PrepareForTermination() {
358 FreeTLSRegion(tls_region_address); 372 FreeTLSRegion(tls_region_address);
359 tls_region_address = 0; 373 tls_region_address = 0;
360 374
375 if (resource_limit) {
376 resource_limit->Release(LimitableResource::PhysicalMemory,
377 main_thread_stack_size + image_size);
378 }
379
361 ChangeStatus(ProcessStatus::Exited); 380 ChangeStatus(ProcessStatus::Exited);
362} 381}
363 382
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 0566311b6..7c567049e 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -6,10 +6,10 @@
6 6
7#include "common/bit_util.h" 7#include "common/bit_util.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/hle/kernel/errors.h"
10#include "core/hle/kernel/handle_table.h" 9#include "core/hle/kernel/handle_table.h"
11#include "core/hle/kernel/memory/page_table.h" 10#include "core/hle/kernel/memory/page_table.h"
12#include "core/hle/kernel/process_capability.h" 11#include "core/hle/kernel/process_capability.h"
12#include "core/hle/kernel/svc_results.h"
13 13
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
@@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
123 // If there's only one, then there's a problem. 123 // If there's only one, then there's a problem.
124 if (i >= num_capabilities) { 124 if (i >= num_capabilities) {
125 LOG_ERROR(Kernel, "Invalid combination! i={}", i); 125 LOG_ERROR(Kernel, "Invalid combination! i={}", i);
126 return ERR_INVALID_COMBINATION; 126 return ResultInvalidCombination;
127 } 127 }
128 128
129 const auto size_flags = capabilities[i]; 129 const auto size_flags = capabilities[i];
130 if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) { 130 if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) {
131 LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags); 131 LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags);
132 return ERR_INVALID_COMBINATION; 132 return ResultInvalidCombination;
133 } 133 }
134 134
135 const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table); 135 const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table);
@@ -159,7 +159,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
159 const auto type = GetCapabilityType(flag); 159 const auto type = GetCapabilityType(flag);
160 160
161 if (type == CapabilityType::Unset) { 161 if (type == CapabilityType::Unset) {
162 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 162 return ResultInvalidCapabilityDescriptor;
163 } 163 }
164 164
165 // Bail early on ignorable entries, as one would expect, 165 // Bail early on ignorable entries, as one would expect,
@@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
176 LOG_ERROR(Kernel, 176 LOG_ERROR(Kernel,
177 "Attempted to initialize flags that may only be initialized once. set_flags={}", 177 "Attempted to initialize flags that may only be initialized once. set_flags={}",
178 set_flags); 178 set_flags);
179 return ERR_INVALID_COMBINATION; 179 return ResultInvalidCombination;
180 } 180 }
181 set_flags |= set_flag; 181 set_flags |= set_flag;
182 182
@@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
202 } 202 }
203 203
204 LOG_ERROR(Kernel, "Invalid capability type! type={}", type); 204 LOG_ERROR(Kernel, "Invalid capability type! type={}", type);
205 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 205 return ResultInvalidCapabilityDescriptor;
206} 206}
207 207
208void ProcessCapabilities::Clear() { 208void ProcessCapabilities::Clear() {
@@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
225 if (priority_mask != 0 || core_mask != 0) { 225 if (priority_mask != 0 || core_mask != 0) {
226 LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}", 226 LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}",
227 priority_mask, core_mask); 227 priority_mask, core_mask);
228 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 228 return ResultInvalidCapabilityDescriptor;
229 } 229 }
230 230
231 const u32 core_num_min = (flags >> 16) & 0xFF; 231 const u32 core_num_min = (flags >> 16) & 0xFF;
@@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
233 if (core_num_min > core_num_max) { 233 if (core_num_min > core_num_max) {
234 LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}", 234 LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}",
235 core_num_min, core_num_max); 235 core_num_min, core_num_max);
236 return ERR_INVALID_COMBINATION; 236 return ResultInvalidCombination;
237 } 237 }
238 238
239 const u32 priority_min = (flags >> 10) & 0x3F; 239 const u32 priority_min = (flags >> 10) & 0x3F;
@@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
242 LOG_ERROR(Kernel, 242 LOG_ERROR(Kernel,
243 "Priority min is greater than priority max! priority_min={}, priority_max={}", 243 "Priority min is greater than priority max! priority_min={}, priority_max={}",
244 core_num_min, priority_max); 244 core_num_min, priority_max);
245 return ERR_INVALID_COMBINATION; 245 return ResultInvalidCombination;
246 } 246 }
247 247
248 // The switch only has 4 usable cores. 248 // The switch only has 4 usable cores.
249 if (core_num_max >= 4) { 249 if (core_num_max >= 4) {
250 LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max); 250 LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max);
251 return ERR_INVALID_PROCESSOR_ID; 251 return ResultInvalidCoreId;
252 } 252 }
253 253
254 const auto make_mask = [](u64 min, u64 max) { 254 const auto make_mask = [](u64 min, u64 max) {
@@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
269 269
270 // If we've already set this svc before, bail. 270 // If we've already set this svc before, bail.
271 if ((set_svc_bits & svc_bit) != 0) { 271 if ((set_svc_bits & svc_bit) != 0) {
272 return ERR_INVALID_COMBINATION; 272 return ResultInvalidCombination;
273 } 273 }
274 set_svc_bits |= svc_bit; 274 set_svc_bits |= svc_bit;
275 275
@@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
283 283
284 if (svc_number >= svc_capabilities.size()) { 284 if (svc_number >= svc_capabilities.size()) {
285 LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number); 285 LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number);
286 return ERR_OUT_OF_RANGE; 286 return ResultOutOfRange;
287 } 287 }
288 288
289 svc_capabilities[svc_number] = true; 289 svc_capabilities[svc_number] = true;
@@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
321 if (interrupt >= interrupt_capabilities.size()) { 321 if (interrupt >= interrupt_capabilities.size()) {
322 LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}", 322 LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}",
323 interrupt); 323 interrupt);
324 return ERR_OUT_OF_RANGE; 324 return ResultOutOfRange;
325 } 325 }
326 326
327 interrupt_capabilities[interrupt] = true; 327 interrupt_capabilities[interrupt] = true;
@@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) {
334 const u32 reserved = flags >> 17; 334 const u32 reserved = flags >> 17;
335 if (reserved != 0) { 335 if (reserved != 0) {
336 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); 336 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
337 return ERR_RESERVED_VALUE; 337 return ResultReservedValue;
338 } 338 }
339 339
340 program_type = static_cast<ProgramType>((flags >> 14) & 0b111); 340 program_type = static_cast<ProgramType>((flags >> 14) & 0b111);
@@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
354 LOG_ERROR(Kernel, 354 LOG_ERROR(Kernel,
355 "Kernel version is non zero or flags are too small! major_version={}, flags={}", 355 "Kernel version is non zero or flags are too small! major_version={}, flags={}",
356 major_version, flags); 356 major_version, flags);
357 return ERR_INVALID_CAPABILITY_DESCRIPTOR; 357 return ResultInvalidCapabilityDescriptor;
358 } 358 }
359 359
360 kernel_version = flags; 360 kernel_version = flags;
@@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
365 const u32 reserved = flags >> 26; 365 const u32 reserved = flags >> 26;
366 if (reserved != 0) { 366 if (reserved != 0) {
367 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); 367 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
368 return ERR_RESERVED_VALUE; 368 return ResultReservedValue;
369 } 369 }
370 370
371 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); 371 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
@@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) {
376 const u32 reserved = flags >> 19; 376 const u32 reserved = flags >> 19;
377 if (reserved != 0) { 377 if (reserved != 0) {
378 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); 378 LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
379 return ERR_RESERVED_VALUE; 379 return ResultReservedValue;
380 } 380 }
381 381
382 is_debuggable = (flags & 0x20000) != 0; 382 is_debuggable = (flags & 0x20000) != 0;
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp
index fe7a483c4..5d17346ad 100644
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -5,11 +5,11 @@
5#include <tuple> 5#include <tuple>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "core/hle/kernel/client_port.h" 7#include "core/hle/kernel/client_port.h"
8#include "core/hle/kernel/errors.h"
9#include "core/hle/kernel/k_thread.h" 8#include "core/hle/kernel/k_thread.h"
10#include "core/hle/kernel/object.h" 9#include "core/hle/kernel/object.h"
11#include "core/hle/kernel/server_port.h" 10#include "core/hle/kernel/server_port.h"
12#include "core/hle/kernel/server_session.h" 11#include "core/hle/kernel/server_session.h"
12#include "core/hle/kernel/svc_results.h"
13 13
14namespace Kernel { 14namespace Kernel {
15 15
@@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default;
18 18
19ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { 19ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
20 if (pending_sessions.empty()) { 20 if (pending_sessions.empty()) {
21 return ERR_NOT_FOUND; 21 return ResultNotFound;
22 } 22 }
23 23
24 auto session = std::move(pending_sessions.back()); 24 auto session = std::move(pending_sessions.back());
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp
index 75304b961..8830d4e91 100644
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -4,15 +4,23 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/hle/kernel/client_session.h" 6#include "core/hle/kernel/client_session.h"
7#include "core/hle/kernel/k_scoped_resource_reservation.h"
7#include "core/hle/kernel/server_session.h" 8#include "core/hle/kernel/server_session.h"
8#include "core/hle/kernel/session.h" 9#include "core/hle/kernel/session.h"
9 10
10namespace Kernel { 11namespace Kernel {
11 12
12Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {} 13Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
13Session::~Session() = default; 14Session::~Session() {
15 // Release reserved resource when the Session pair was created.
16 kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1);
17}
14 18
15Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { 19Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
20 // Reserve a new session from the resource limit.
21 KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(),
22 LimitableResource::Sessions);
23 ASSERT(session_reservation.Succeeded());
16 auto session{std::make_shared<Session>(kernel)}; 24 auto session{std::make_shared<Session>(kernel)};
17 auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()}; 25 auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()};
18 auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()}; 26 auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()};
@@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
21 session->client = client_session; 29 session->client = client_session;
22 session->server = server_session; 30 session->server = server_session;
23 31
32 session_reservation.Commit();
24 return std::make_pair(std::move(client_session), std::move(server_session)); 33 return std::make_pair(std::move(client_session), std::move(server_session));
25} 34}
26 35
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 0cd467110..2eadd51d7 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/core.h" 6#include "core/core.h"
7#include "core/hle/kernel/k_scoped_resource_reservation.h"
7#include "core/hle/kernel/kernel.h" 8#include "core/hle/kernel/kernel.h"
8#include "core/hle/kernel/memory/page_table.h" 9#include "core/hle/kernel/memory/page_table.h"
9#include "core/hle/kernel/shared_memory.h" 10#include "core/hle/kernel/shared_memory.h"
@@ -13,7 +14,9 @@ namespace Kernel {
13SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) 14SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory)
14 : Object{kernel}, device_memory{device_memory} {} 15 : Object{kernel}, device_memory{device_memory} {}
15 16
16SharedMemory::~SharedMemory() = default; 17SharedMemory::~SharedMemory() {
18 kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size);
19}
17 20
18std::shared_ptr<SharedMemory> SharedMemory::Create( 21std::shared_ptr<SharedMemory> SharedMemory::Create(
19 KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, 22 KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
@@ -21,6 +24,11 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
21 Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, 24 Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size,
22 std::string name) { 25 std::string name) {
23 26
27 const auto resource_limit = kernel.GetSystemResourceLimit();
28 KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
29 size);
30 ASSERT(memory_reservation.Succeeded());
31
24 std::shared_ptr<SharedMemory> shared_memory{ 32 std::shared_ptr<SharedMemory> shared_memory{
25 std::make_shared<SharedMemory>(kernel, device_memory)}; 33 std::make_shared<SharedMemory>(kernel, device_memory)};
26 34
@@ -32,6 +40,7 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
32 shared_memory->size = size; 40 shared_memory->size = size;
33 shared_memory->name = name; 41 shared_memory->name = name;
34 42
43 memory_reservation.Commit();
35 return shared_memory; 44 return shared_memory;
36} 45}
37 46
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 26650a513..31d899e06 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -23,7 +23,6 @@
23#include "core/cpu_manager.h" 23#include "core/cpu_manager.h"
24#include "core/hle/kernel/client_port.h" 24#include "core/hle/kernel/client_port.h"
25#include "core/hle/kernel/client_session.h" 25#include "core/hle/kernel/client_session.h"
26#include "core/hle/kernel/errors.h"
27#include "core/hle/kernel/handle_table.h" 26#include "core/hle/kernel/handle_table.h"
28#include "core/hle/kernel/k_address_arbiter.h" 27#include "core/hle/kernel/k_address_arbiter.h"
29#include "core/hle/kernel/k_condition_variable.h" 28#include "core/hle/kernel/k_condition_variable.h"
@@ -31,6 +30,7 @@
31#include "core/hle/kernel/k_readable_event.h" 30#include "core/hle/kernel/k_readable_event.h"
32#include "core/hle/kernel/k_resource_limit.h" 31#include "core/hle/kernel/k_resource_limit.h"
33#include "core/hle/kernel/k_scheduler.h" 32#include "core/hle/kernel/k_scheduler.h"
33#include "core/hle/kernel/k_scoped_resource_reservation.h"
34#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" 34#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
35#include "core/hle/kernel/k_synchronization_object.h" 35#include "core/hle/kernel/k_synchronization_object.h"
36#include "core/hle/kernel/k_thread.h" 36#include "core/hle/kernel/k_thread.h"
@@ -71,49 +71,49 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
71 VAddr src_addr, u64 size) { 71 VAddr src_addr, u64 size) {
72 if (!Common::Is4KBAligned(dst_addr)) { 72 if (!Common::Is4KBAligned(dst_addr)) {
73 LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr); 73 LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr);
74 return ERR_INVALID_ADDRESS; 74 return ResultInvalidAddress;
75 } 75 }
76 76
77 if (!Common::Is4KBAligned(src_addr)) { 77 if (!Common::Is4KBAligned(src_addr)) {
78 LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr); 78 LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr);
79 return ERR_INVALID_SIZE; 79 return ResultInvalidSize;
80 } 80 }
81 81
82 if (size == 0) { 82 if (size == 0) {
83 LOG_ERROR(Kernel_SVC, "Size is 0"); 83 LOG_ERROR(Kernel_SVC, "Size is 0");
84 return ERR_INVALID_SIZE; 84 return ResultInvalidSize;
85 } 85 }
86 86
87 if (!Common::Is4KBAligned(size)) { 87 if (!Common::Is4KBAligned(size)) {
88 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size); 88 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size);
89 return ERR_INVALID_SIZE; 89 return ResultInvalidSize;
90 } 90 }
91 91
92 if (!IsValidAddressRange(dst_addr, size)) { 92 if (!IsValidAddressRange(dst_addr, size)) {
93 LOG_ERROR(Kernel_SVC, 93 LOG_ERROR(Kernel_SVC,
94 "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}", 94 "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
95 dst_addr, size); 95 dst_addr, size);
96 return ERR_INVALID_ADDRESS_STATE; 96 return ResultInvalidCurrentMemory;
97 } 97 }
98 98
99 if (!IsValidAddressRange(src_addr, size)) { 99 if (!IsValidAddressRange(src_addr, size)) {
100 LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}", 100 LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
101 src_addr, size); 101 src_addr, size);
102 return ERR_INVALID_ADDRESS_STATE; 102 return ResultInvalidCurrentMemory;
103 } 103 }
104 104
105 if (!manager.IsInsideAddressSpace(src_addr, size)) { 105 if (!manager.IsInsideAddressSpace(src_addr, size)) {
106 LOG_ERROR(Kernel_SVC, 106 LOG_ERROR(Kernel_SVC,
107 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", 107 "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
108 src_addr, size); 108 src_addr, size);
109 return ERR_INVALID_ADDRESS_STATE; 109 return ResultInvalidCurrentMemory;
110 } 110 }
111 111
112 if (manager.IsOutsideStackRegion(dst_addr, size)) { 112 if (manager.IsOutsideStackRegion(dst_addr, size)) {
113 LOG_ERROR(Kernel_SVC, 113 LOG_ERROR(Kernel_SVC,
114 "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", 114 "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}",
115 dst_addr, size); 115 dst_addr, size);
116 return ERR_INVALID_MEMORY_RANGE; 116 return ResultInvalidMemoryRange;
117 } 117 }
118 118
119 if (manager.IsInsideHeapRegion(dst_addr, size)) { 119 if (manager.IsInsideHeapRegion(dst_addr, size)) {
@@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
121 "Destination does not fit within the heap region, addr=0x{:016X}, " 121 "Destination does not fit within the heap region, addr=0x{:016X}, "
122 "size=0x{:016X}", 122 "size=0x{:016X}",
123 dst_addr, size); 123 dst_addr, size);
124 return ERR_INVALID_MEMORY_RANGE; 124 return ResultInvalidMemoryRange;
125 } 125 }
126 126
127 if (manager.IsInsideAliasRegion(dst_addr, size)) { 127 if (manager.IsInsideAliasRegion(dst_addr, size)) {
@@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
129 "Destination does not fit within the map region, addr=0x{:016X}, " 129 "Destination does not fit within the map region, addr=0x{:016X}, "
130 "size=0x{:016X}", 130 "size=0x{:016X}",
131 dst_addr, size); 131 dst_addr, size);
132 return ERR_INVALID_MEMORY_RANGE; 132 return ResultInvalidMemoryRange;
133 } 133 }
134 134
135 return RESULT_SUCCESS; 135 return RESULT_SUCCESS;
@@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
138enum class ResourceLimitValueType { 138enum class ResourceLimitValueType {
139 CurrentValue, 139 CurrentValue,
140 LimitValue, 140 LimitValue,
141 PeakValue,
141}; 142};
142 143
143ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, 144ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
@@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
146 const auto type = static_cast<LimitableResource>(resource_type); 147 const auto type = static_cast<LimitableResource>(resource_type);
147 if (!IsValidResourceType(type)) { 148 if (!IsValidResourceType(type)) {
148 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); 149 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
149 return ERR_INVALID_ENUM_VALUE; 150 return ResultInvalidEnumValue;
150 } 151 }
151 152
152 const auto* const current_process = system.Kernel().CurrentProcess(); 153 const auto* const current_process = system.Kernel().CurrentProcess();
@@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
157 if (!resource_limit_object) { 158 if (!resource_limit_object) {
158 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", 159 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
159 resource_limit); 160 resource_limit);
160 return ERR_INVALID_HANDLE; 161 return ResultInvalidHandle;
161 } 162 }
162 163
163 if (value_type == ResourceLimitValueType::CurrentValue) { 164 switch (value_type) {
165 case ResourceLimitValueType::CurrentValue:
164 return MakeResult(resource_limit_object->GetCurrentValue(type)); 166 return MakeResult(resource_limit_object->GetCurrentValue(type));
167 case ResourceLimitValueType::LimitValue:
168 return MakeResult(resource_limit_object->GetLimitValue(type));
169 case ResourceLimitValueType::PeakValue:
170 return MakeResult(resource_limit_object->GetPeakValue(type));
171 default:
172 LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type);
173 return ResultInvalidEnumValue;
165 } 174 }
166
167 return MakeResult(resource_limit_object->GetLimitValue(type));
168} 175}
169} // Anonymous namespace 176} // Anonymous namespace
170 177
@@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
177 if ((heap_size % 0x200000) != 0) { 184 if ((heap_size % 0x200000) != 0) {
178 LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}", 185 LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
179 heap_size); 186 heap_size);
180 return ERR_INVALID_SIZE; 187 return ResultInvalidSize;
181 } 188 }
182 189
183 if (heap_size >= 0x200000000) { 190 if (heap_size >= 0x200000000) {
184 LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size); 191 LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
185 return ERR_INVALID_SIZE; 192 return ResultInvalidSize;
186 } 193 }
187 194
188 auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; 195 auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -208,19 +215,19 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
208 215
209 if (!Common::Is4KBAligned(address)) { 216 if (!Common::Is4KBAligned(address)) {
210 LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address); 217 LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address);
211 return ERR_INVALID_ADDRESS; 218 return ResultInvalidAddress;
212 } 219 }
213 220
214 if (size == 0 || !Common::Is4KBAligned(size)) { 221 if (size == 0 || !Common::Is4KBAligned(size)) {
215 LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.", 222 LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.",
216 size); 223 size);
217 return ERR_INVALID_ADDRESS; 224 return ResultInvalidAddress;
218 } 225 }
219 226
220 if (!IsValidAddressRange(address, size)) { 227 if (!IsValidAddressRange(address, size)) {
221 LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})", 228 LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})",
222 address, size); 229 address, size);
223 return ERR_INVALID_ADDRESS_STATE; 230 return ResultInvalidCurrentMemory;
224 } 231 }
225 232
226 const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)}; 233 const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)};
@@ -229,7 +236,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
229 LOG_ERROR(Kernel_SVC, 236 LOG_ERROR(Kernel_SVC,
230 "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}", 237 "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}",
231 attribute, mask); 238 attribute, mask);
232 return ERR_INVALID_COMBINATION; 239 return ResultInvalidCombination;
233 } 240 }
234 241
235 auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; 242 auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
293 LOG_ERROR(Kernel_SVC, 300 LOG_ERROR(Kernel_SVC,
294 "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", 301 "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
295 port_name_address); 302 port_name_address);
296 return ERR_NOT_FOUND; 303 return ResultNotFound;
297 } 304 }
298 305
299 static constexpr std::size_t PortNameMaxLength = 11; 306 static constexpr std::size_t PortNameMaxLength = 11;
@@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
302 if (port_name.size() > PortNameMaxLength) { 309 if (port_name.size() > PortNameMaxLength) {
303 LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength, 310 LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength,
304 port_name.size()); 311 port_name.size());
305 return ERR_OUT_OF_RANGE; 312 return ResultOutOfRange;
306 } 313 }
307 314
308 LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); 315 LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
@@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
311 const auto it = kernel.FindNamedPort(port_name); 318 const auto it = kernel.FindNamedPort(port_name);
312 if (!kernel.IsValidNamedPort(it)) { 319 if (!kernel.IsValidNamedPort(it)) {
313 LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); 320 LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
314 return ERR_NOT_FOUND; 321 return ResultNotFound;
315 } 322 }
316 323
317 ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1));
318
319 auto client_port = it->second; 324 auto client_port = it->second;
320 325
321 std::shared_ptr<ClientSession> client_session; 326 std::shared_ptr<ClientSession> client_session;
@@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
340 std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); 345 std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
341 if (!session) { 346 if (!session) {
342 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); 347 LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
343 return ERR_INVALID_HANDLE; 348 return ResultInvalidHandle;
344 } 349 }
345 350
346 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); 351 LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
@@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
405 const Process* const owner_process = thread->GetOwnerProcess(); 410 const Process* const owner_process = thread->GetOwnerProcess();
406 if (!owner_process) { 411 if (!owner_process) {
407 LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered."); 412 LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered.");
408 return ERR_INVALID_HANDLE; 413 return ResultInvalidHandle;
409 } 414 }
410 415
411 *process_id = owner_process->GetProcessID(); 416 *process_id = owner_process->GetProcessID();
@@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
415 // NOTE: This should also handle debug objects before returning. 420 // NOTE: This should also handle debug objects before returning.
416 421
417 LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle); 422 LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle);
418 return ERR_INVALID_HANDLE; 423 return ResultInvalidHandle;
419} 424}
420 425
421static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, 426static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
@@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
438 LOG_ERROR(Kernel_SVC, 443 LOG_ERROR(Kernel_SVC,
439 "Handle address is not a valid virtual address, handle_address=0x{:016X}", 444 "Handle address is not a valid virtual address, handle_address=0x{:016X}",
440 handles_address); 445 handles_address);
441 return ERR_INVALID_POINTER; 446 return ResultInvalidPointer;
442 } 447 }
443 448
444 static constexpr u64 MaxHandles = 0x40; 449 static constexpr u64 MaxHandles = 0x40;
@@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
446 if (handle_count > MaxHandles) { 451 if (handle_count > MaxHandles) {
447 LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}", 452 LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}",
448 MaxHandles, handle_count); 453 MaxHandles, handle_count);
449 return ERR_OUT_OF_RANGE; 454 return ResultOutOfRange;
450 } 455 }
451 456
452 auto& kernel = system.Kernel(); 457 auto& kernel = system.Kernel();
@@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
459 464
460 if (object == nullptr) { 465 if (object == nullptr) {
461 LOG_ERROR(Kernel_SVC, "Object is a nullptr"); 466 LOG_ERROR(Kernel_SVC, "Object is a nullptr");
462 return ERR_INVALID_HANDLE; 467 return ResultInvalidHandle;
463 } 468 }
464 469
465 objects[i] = object.get(); 470 objects[i] = object.get();
@@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
481 // Get the thread from its handle. 486 // Get the thread from its handle.
482 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 487 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
483 std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle); 488 std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle);
489
484 if (!thread) { 490 if (!thread) {
485 LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle); 491 LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle);
486 return ResultInvalidHandle; 492 return ResultInvalidHandle;
@@ -525,6 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
525 LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address); 531 LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);
526 532
527 // Validate the input address. 533 // Validate the input address.
534
528 if (Memory::IsKernelAddress(address)) { 535 if (Memory::IsKernelAddress(address)) {
529 LOG_ERROR(Kernel_SVC, 536 LOG_ERROR(Kernel_SVC,
530 "Attempting to arbitrate an unlock on a kernel address (address={:08X})", 537 "Attempting to arbitrate an unlock on a kernel address (address={:08X})",
@@ -735,7 +742,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
735 if (info_sub_id != 0) { 742 if (info_sub_id != 0) {
736 LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, 743 LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
737 info_sub_id); 744 info_sub_id);
738 return ERR_INVALID_ENUM_VALUE; 745 return ResultInvalidEnumValue;
739 } 746 }
740 747
741 const auto& current_process_handle_table = 748 const auto& current_process_handle_table =
@@ -744,7 +751,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
744 if (!process) { 751 if (!process) {
745 LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}", 752 LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}",
746 info_id, info_sub_id, handle); 753 info_id, info_sub_id, handle);
747 return ERR_INVALID_HANDLE; 754 return ResultInvalidHandle;
748 } 755 }
749 756
750 switch (info_id_type) { 757 switch (info_id_type) {
@@ -826,7 +833,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
826 } 833 }
827 834
828 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); 835 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
829 return ERR_INVALID_ENUM_VALUE; 836 return ResultInvalidEnumValue;
830 } 837 }
831 838
832 case GetInfoType::IsCurrentProcessBeingDebugged: 839 case GetInfoType::IsCurrentProcessBeingDebugged:
@@ -836,13 +843,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
836 case GetInfoType::RegisterResourceLimit: { 843 case GetInfoType::RegisterResourceLimit: {
837 if (handle != 0) { 844 if (handle != 0) {
838 LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle); 845 LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle);
839 return ERR_INVALID_HANDLE; 846 return ResultInvalidHandle;
840 } 847 }
841 848
842 if (info_sub_id != 0) { 849 if (info_sub_id != 0) {
843 LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, 850 LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
844 info_sub_id); 851 info_sub_id);
845 return ERR_INVALID_COMBINATION; 852 return ResultInvalidCombination;
846 } 853 }
847 854
848 Process* const current_process = system.Kernel().CurrentProcess(); 855 Process* const current_process = system.Kernel().CurrentProcess();
@@ -867,13 +874,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
867 if (handle != 0) { 874 if (handle != 0) {
868 LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}", 875 LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}",
869 handle); 876 handle);
870 return ERR_INVALID_HANDLE; 877 return ResultInvalidHandle;
871 } 878 }
872 879
873 if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) { 880 if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) {
874 LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}", 881 LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}",
875 Process::RANDOM_ENTROPY_SIZE, info_sub_id); 882 Process::RANDOM_ENTROPY_SIZE, info_sub_id);
876 return ERR_INVALID_COMBINATION; 883 return ResultInvalidCombination;
877 } 884 }
878 885
879 *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id); 886 *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
@@ -890,7 +897,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
890 if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) { 897 if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
891 LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus, 898 LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus,
892 info_sub_id); 899 info_sub_id);
893 return ERR_INVALID_COMBINATION; 900 return ResultInvalidCombination;
894 } 901 }
895 902
896 const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>( 903 const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>(
@@ -898,7 +905,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
898 if (!thread) { 905 if (!thread) {
899 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", 906 LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
900 static_cast<Handle>(handle)); 907 static_cast<Handle>(handle));
901 return ERR_INVALID_HANDLE; 908 return ResultInvalidHandle;
902 } 909 }
903 910
904 const auto& core_timing = system.CoreTiming(); 911 const auto& core_timing = system.CoreTiming();
@@ -922,7 +929,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
922 929
923 default: 930 default:
924 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); 931 LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
925 return ERR_INVALID_ENUM_VALUE; 932 return ResultInvalidEnumValue;
926 } 933 }
927} 934}
928 935
@@ -945,22 +952,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
945 952
946 if (!Common::Is4KBAligned(addr)) { 953 if (!Common::Is4KBAligned(addr)) {
947 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); 954 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
948 return ERR_INVALID_ADDRESS; 955 return ResultInvalidAddress;
949 } 956 }
950 957
951 if (!Common::Is4KBAligned(size)) { 958 if (!Common::Is4KBAligned(size)) {
952 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); 959 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
953 return ERR_INVALID_SIZE; 960 return ResultInvalidSize;
954 } 961 }
955 962
956 if (size == 0) { 963 if (size == 0) {
957 LOG_ERROR(Kernel_SVC, "Size is zero"); 964 LOG_ERROR(Kernel_SVC, "Size is zero");
958 return ERR_INVALID_SIZE; 965 return ResultInvalidSize;
959 } 966 }
960 967
961 if (!(addr < addr + size)) { 968 if (!(addr < addr + size)) {
962 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); 969 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
963 return ERR_INVALID_MEMORY_RANGE; 970 return ResultInvalidMemoryRange;
964 } 971 }
965 972
966 Process* const current_process{system.Kernel().CurrentProcess()}; 973 Process* const current_process{system.Kernel().CurrentProcess()};
@@ -968,21 +975,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
968 975
969 if (current_process->GetSystemResourceSize() == 0) { 976 if (current_process->GetSystemResourceSize() == 0) {
970 LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); 977 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
971 return ERR_INVALID_STATE; 978 return ResultInvalidState;
972 } 979 }
973 980
974 if (!page_table.IsInsideAddressSpace(addr, size)) { 981 if (!page_table.IsInsideAddressSpace(addr, size)) {
975 LOG_ERROR(Kernel_SVC, 982 LOG_ERROR(Kernel_SVC,
976 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 983 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
977 size); 984 size);
978 return ERR_INVALID_MEMORY_RANGE; 985 return ResultInvalidMemoryRange;
979 } 986 }
980 987
981 if (page_table.IsOutsideAliasRegion(addr, size)) { 988 if (page_table.IsOutsideAliasRegion(addr, size)) {
982 LOG_ERROR(Kernel_SVC, 989 LOG_ERROR(Kernel_SVC,
983 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, 990 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
984 size); 991 size);
985 return ERR_INVALID_MEMORY_RANGE; 992 return ResultInvalidMemoryRange;
986 } 993 }
987 994
988 return page_table.MapPhysicalMemory(addr, size); 995 return page_table.MapPhysicalMemory(addr, size);
@@ -999,22 +1006,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
999 1006
1000 if (!Common::Is4KBAligned(addr)) { 1007 if (!Common::Is4KBAligned(addr)) {
1001 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); 1008 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
1002 return ERR_INVALID_ADDRESS; 1009 return ResultInvalidAddress;
1003 } 1010 }
1004 1011
1005 if (!Common::Is4KBAligned(size)) { 1012 if (!Common::Is4KBAligned(size)) {
1006 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); 1013 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
1007 return ERR_INVALID_SIZE; 1014 return ResultInvalidSize;
1008 } 1015 }
1009 1016
1010 if (size == 0) { 1017 if (size == 0) {
1011 LOG_ERROR(Kernel_SVC, "Size is zero"); 1018 LOG_ERROR(Kernel_SVC, "Size is zero");
1012 return ERR_INVALID_SIZE; 1019 return ResultInvalidSize;
1013 } 1020 }
1014 1021
1015 if (!(addr < addr + size)) { 1022 if (!(addr < addr + size)) {
1016 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); 1023 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
1017 return ERR_INVALID_MEMORY_RANGE; 1024 return ResultInvalidMemoryRange;
1018 } 1025 }
1019 1026
1020 Process* const current_process{system.Kernel().CurrentProcess()}; 1027 Process* const current_process{system.Kernel().CurrentProcess()};
@@ -1022,21 +1029,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
1022 1029
1023 if (current_process->GetSystemResourceSize() == 0) { 1030 if (current_process->GetSystemResourceSize() == 0) {
1024 LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); 1031 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
1025 return ERR_INVALID_STATE; 1032 return ResultInvalidState;
1026 } 1033 }
1027 1034
1028 if (!page_table.IsInsideAddressSpace(addr, size)) { 1035 if (!page_table.IsInsideAddressSpace(addr, size)) {
1029 LOG_ERROR(Kernel_SVC, 1036 LOG_ERROR(Kernel_SVC,
1030 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, 1037 "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
1031 size); 1038 size);
1032 return ERR_INVALID_MEMORY_RANGE; 1039 return ResultInvalidMemoryRange;
1033 } 1040 }
1034 1041
1035 if (page_table.IsOutsideAliasRegion(addr, size)) { 1042 if (page_table.IsOutsideAliasRegion(addr, size)) {
1036 LOG_ERROR(Kernel_SVC, 1043 LOG_ERROR(Kernel_SVC,
1037 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, 1044 "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
1038 size); 1045 size);
1039 return ERR_INVALID_MEMORY_RANGE; 1046 return ResultInvalidMemoryRange;
1040 } 1047 }
1041 1048
1042 return page_table.UnmapPhysicalMemory(addr, size); 1049 return page_table.UnmapPhysicalMemory(addr, size);
@@ -1206,23 +1213,23 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1206 1213
1207 if (!Common::Is4KBAligned(addr)) { 1214 if (!Common::Is4KBAligned(addr)) {
1208 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); 1215 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr);
1209 return ERR_INVALID_ADDRESS; 1216 return ResultInvalidAddress;
1210 } 1217 }
1211 1218
1212 if (size == 0) { 1219 if (size == 0) {
1213 LOG_ERROR(Kernel_SVC, "Size is 0"); 1220 LOG_ERROR(Kernel_SVC, "Size is 0");
1214 return ERR_INVALID_SIZE; 1221 return ResultInvalidSize;
1215 } 1222 }
1216 1223
1217 if (!Common::Is4KBAligned(size)) { 1224 if (!Common::Is4KBAligned(size)) {
1218 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); 1225 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size);
1219 return ERR_INVALID_SIZE; 1226 return ResultInvalidSize;
1220 } 1227 }
1221 1228
1222 if (!IsValidAddressRange(addr, size)) { 1229 if (!IsValidAddressRange(addr, size)) {
1223 LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", 1230 LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
1224 addr, size); 1231 addr, size);
1225 return ERR_INVALID_ADDRESS_STATE; 1232 return ResultInvalidCurrentMemory;
1226 } 1233 }
1227 1234
1228 const auto permission_type = static_cast<Memory::MemoryPermission>(permissions); 1235 const auto permission_type = static_cast<Memory::MemoryPermission>(permissions);
@@ -1230,7 +1237,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1230 Memory::MemoryPermission::ReadAndWrite) { 1237 Memory::MemoryPermission::ReadAndWrite) {
1231 LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}", 1238 LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}",
1232 permissions); 1239 permissions);
1233 return ERR_INVALID_MEMORY_PERMISSIONS; 1240 return ResultInvalidMemoryPermissions;
1234 } 1241 }
1235 1242
1236 auto* const current_process{system.Kernel().CurrentProcess()}; 1243 auto* const current_process{system.Kernel().CurrentProcess()};
@@ -1241,7 +1248,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1241 "Addr does not fit within the valid region, addr=0x{:016X}, " 1248 "Addr does not fit within the valid region, addr=0x{:016X}, "
1242 "size=0x{:016X}", 1249 "size=0x{:016X}",
1243 addr, size); 1250 addr, size);
1244 return ERR_INVALID_MEMORY_RANGE; 1251 return ResultInvalidMemoryRange;
1245 } 1252 }
1246 1253
1247 if (page_table.IsInsideHeapRegion(addr, size)) { 1254 if (page_table.IsInsideHeapRegion(addr, size)) {
@@ -1249,7 +1256,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1249 "Addr does not fit within the heap region, addr=0x{:016X}, " 1256 "Addr does not fit within the heap region, addr=0x{:016X}, "
1250 "size=0x{:016X}", 1257 "size=0x{:016X}",
1251 addr, size); 1258 addr, size);
1252 return ERR_INVALID_MEMORY_RANGE; 1259 return ResultInvalidMemoryRange;
1253 } 1260 }
1254 1261
1255 if (page_table.IsInsideAliasRegion(addr, size)) { 1262 if (page_table.IsInsideAliasRegion(addr, size)) {
@@ -1257,14 +1264,14 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
1257 "Address does not fit within the map region, addr=0x{:016X}, " 1264 "Address does not fit within the map region, addr=0x{:016X}, "
1258 "size=0x{:016X}", 1265 "size=0x{:016X}",
1259 addr, size); 1266 addr, size);
1260 return ERR_INVALID_MEMORY_RANGE; 1267 return ResultInvalidMemoryRange;
1261 } 1268 }
1262 1269
1263 auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)}; 1270 auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)};
1264 if (!shared_memory) { 1271 if (!shared_memory) {
1265 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", 1272 LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
1266 shared_memory_handle); 1273 shared_memory_handle);
1267 return ERR_INVALID_HANDLE; 1274 return ResultInvalidHandle;
1268 } 1275 }
1269 1276
1270 return shared_memory->Map(*current_process, addr, size, permission_type); 1277 return shared_memory->Map(*current_process, addr, size, permission_type);
@@ -1285,7 +1292,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add
1285 if (!process) { 1292 if (!process) {
1286 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", 1293 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
1287 process_handle); 1294 process_handle);
1288 return ERR_INVALID_HANDLE; 1295 return ResultInvalidHandle;
1289 } 1296 }
1290 1297
1291 auto& memory{system.Memory()}; 1298 auto& memory{system.Memory()};
@@ -1332,18 +1339,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1332 if (!Common::Is4KBAligned(src_address)) { 1339 if (!Common::Is4KBAligned(src_address)) {
1333 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", 1340 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
1334 src_address); 1341 src_address);
1335 return ERR_INVALID_ADDRESS; 1342 return ResultInvalidAddress;
1336 } 1343 }
1337 1344
1338 if (!Common::Is4KBAligned(dst_address)) { 1345 if (!Common::Is4KBAligned(dst_address)) {
1339 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", 1346 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
1340 dst_address); 1347 dst_address);
1341 return ERR_INVALID_ADDRESS; 1348 return ResultInvalidAddress;
1342 } 1349 }
1343 1350
1344 if (size == 0 || !Common::Is4KBAligned(size)) { 1351 if (size == 0 || !Common::Is4KBAligned(size)) {
1345 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); 1352 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
1346 return ERR_INVALID_SIZE; 1353 return ResultInvalidSize;
1347 } 1354 }
1348 1355
1349 if (!IsValidAddressRange(dst_address, size)) { 1356 if (!IsValidAddressRange(dst_address, size)) {
@@ -1351,7 +1358,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1351 "Destination address range overflows the address space (dst_address=0x{:016X}, " 1358 "Destination address range overflows the address space (dst_address=0x{:016X}, "
1352 "size=0x{:016X}).", 1359 "size=0x{:016X}).",
1353 dst_address, size); 1360 dst_address, size);
1354 return ERR_INVALID_ADDRESS_STATE; 1361 return ResultInvalidCurrentMemory;
1355 } 1362 }
1356 1363
1357 if (!IsValidAddressRange(src_address, size)) { 1364 if (!IsValidAddressRange(src_address, size)) {
@@ -1359,7 +1366,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1359 "Source address range overflows the address space (src_address=0x{:016X}, " 1366 "Source address range overflows the address space (src_address=0x{:016X}, "
1360 "size=0x{:016X}).", 1367 "size=0x{:016X}).",
1361 src_address, size); 1368 src_address, size);
1362 return ERR_INVALID_ADDRESS_STATE; 1369 return ResultInvalidCurrentMemory;
1363 } 1370 }
1364 1371
1365 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1372 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1367,7 +1374,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1367 if (!process) { 1374 if (!process) {
1368 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", 1375 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
1369 process_handle); 1376 process_handle);
1370 return ERR_INVALID_HANDLE; 1377 return ResultInvalidHandle;
1371 } 1378 }
1372 1379
1373 auto& page_table = process->PageTable(); 1380 auto& page_table = process->PageTable();
@@ -1376,7 +1383,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1376 "Source address range is not within the address space (src_address=0x{:016X}, " 1383 "Source address range is not within the address space (src_address=0x{:016X}, "
1377 "size=0x{:016X}).", 1384 "size=0x{:016X}).",
1378 src_address, size); 1385 src_address, size);
1379 return ERR_INVALID_ADDRESS_STATE; 1386 return ResultInvalidCurrentMemory;
1380 } 1387 }
1381 1388
1382 if (!page_table.IsInsideASLRRegion(dst_address, size)) { 1389 if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1384,7 +1391,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
1384 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " 1391 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
1385 "size=0x{:016X}).", 1392 "size=0x{:016X}).",
1386 dst_address, size); 1393 dst_address, size);
1387 return ERR_INVALID_MEMORY_RANGE; 1394 return ResultInvalidMemoryRange;
1388 } 1395 }
1389 1396
1390 return page_table.MapProcessCodeMemory(dst_address, src_address, size); 1397 return page_table.MapProcessCodeMemory(dst_address, src_address, size);
@@ -1400,18 +1407,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1400 if (!Common::Is4KBAligned(dst_address)) { 1407 if (!Common::Is4KBAligned(dst_address)) {
1401 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", 1408 LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
1402 dst_address); 1409 dst_address);
1403 return ERR_INVALID_ADDRESS; 1410 return ResultInvalidAddress;
1404 } 1411 }
1405 1412
1406 if (!Common::Is4KBAligned(src_address)) { 1413 if (!Common::Is4KBAligned(src_address)) {
1407 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", 1414 LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
1408 src_address); 1415 src_address);
1409 return ERR_INVALID_ADDRESS; 1416 return ResultInvalidAddress;
1410 } 1417 }
1411 1418
1412 if (size == 0 || Common::Is4KBAligned(size)) { 1419 if (size == 0 || Common::Is4KBAligned(size)) {
1413 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); 1420 LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
1414 return ERR_INVALID_SIZE; 1421 return ResultInvalidSize;
1415 } 1422 }
1416 1423
1417 if (!IsValidAddressRange(dst_address, size)) { 1424 if (!IsValidAddressRange(dst_address, size)) {
@@ -1419,7 +1426,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1419 "Destination address range overflows the address space (dst_address=0x{:016X}, " 1426 "Destination address range overflows the address space (dst_address=0x{:016X}, "
1420 "size=0x{:016X}).", 1427 "size=0x{:016X}).",
1421 dst_address, size); 1428 dst_address, size);
1422 return ERR_INVALID_ADDRESS_STATE; 1429 return ResultInvalidCurrentMemory;
1423 } 1430 }
1424 1431
1425 if (!IsValidAddressRange(src_address, size)) { 1432 if (!IsValidAddressRange(src_address, size)) {
@@ -1427,7 +1434,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1427 "Source address range overflows the address space (src_address=0x{:016X}, " 1434 "Source address range overflows the address space (src_address=0x{:016X}, "
1428 "size=0x{:016X}).", 1435 "size=0x{:016X}).",
1429 src_address, size); 1436 src_address, size);
1430 return ERR_INVALID_ADDRESS_STATE; 1437 return ResultInvalidCurrentMemory;
1431 } 1438 }
1432 1439
1433 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 1440 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1435,7 +1442,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1435 if (!process) { 1442 if (!process) {
1436 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", 1443 LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
1437 process_handle); 1444 process_handle);
1438 return ERR_INVALID_HANDLE; 1445 return ResultInvalidHandle;
1439 } 1446 }
1440 1447
1441 auto& page_table = process->PageTable(); 1448 auto& page_table = process->PageTable();
@@ -1444,7 +1451,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1444 "Source address range is not within the address space (src_address=0x{:016X}, " 1451 "Source address range is not within the address space (src_address=0x{:016X}, "
1445 "size=0x{:016X}).", 1452 "size=0x{:016X}).",
1446 src_address, size); 1453 src_address, size);
1447 return ERR_INVALID_ADDRESS_STATE; 1454 return ResultInvalidCurrentMemory;
1448 } 1455 }
1449 1456
1450 if (!page_table.IsInsideASLRRegion(dst_address, size)) { 1457 if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1452,7 +1459,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
1452 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " 1459 "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
1453 "size=0x{:016X}).", 1460 "size=0x{:016X}).",
1454 dst_address, size); 1461 dst_address, size);
1455 return ERR_INVALID_MEMORY_RANGE; 1462 return ResultInvalidMemoryRange;
1456 } 1463 }
1457 1464
1458 return page_table.UnmapProcessCodeMemory(dst_address, src_address, size); 1465 return page_table.UnmapProcessCodeMemory(dst_address, src_address, size);
@@ -1515,8 +1522,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1515 return ResultInvalidPriority; 1522 return ResultInvalidPriority;
1516 } 1523 }
1517 1524
1518 ASSERT(process.GetResourceLimit()->Reserve( 1525 KScopedResourceReservation thread_reservation(
1519 LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000)); 1526 kernel.CurrentProcess(), LimitableResource::Threads, 1,
1527 system.CoreTiming().GetGlobalTimeNs().count() + 100000000);
1528 if (!thread_reservation.Succeeded()) {
1529 LOG_ERROR(Kernel_SVC, "Could not reserve a new thread");
1530 return ResultResourceLimitedExceeded;
1531 }
1520 1532
1521 std::shared_ptr<KThread> thread; 1533 std::shared_ptr<KThread> thread;
1522 { 1534 {
@@ -1536,6 +1548,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
1536 // Set the thread name for debugging purposes. 1548 // Set the thread name for debugging purposes.
1537 thread->SetName( 1549 thread->SetName(
1538 fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); 1550 fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
1551 thread_reservation.Commit();
1539 1552
1540 return RESULT_SUCCESS; 1553 return RESULT_SUCCESS;
1541} 1554}
@@ -1844,7 +1857,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
1844 1857
1845 LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle); 1858 LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle);
1846 1859
1847 return Svc::ResultInvalidHandle; 1860 return ResultInvalidHandle;
1848} 1861}
1849 1862
1850static ResultCode ResetSignal32(Core::System& system, Handle handle) { 1863static ResultCode ResetSignal32(Core::System& system, Handle handle) {
@@ -1860,18 +1873,18 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1860 1873
1861 if (!Common::Is4KBAligned(addr)) { 1874 if (!Common::Is4KBAligned(addr)) {
1862 LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr); 1875 LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
1863 return ERR_INVALID_ADDRESS; 1876 return ResultInvalidAddress;
1864 } 1877 }
1865 1878
1866 if (!Common::Is4KBAligned(size) || size == 0) { 1879 if (!Common::Is4KBAligned(size) || size == 0) {
1867 LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size); 1880 LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
1868 return ERR_INVALID_ADDRESS; 1881 return ResultInvalidAddress;
1869 } 1882 }
1870 1883
1871 if (!IsValidAddressRange(addr, size)) { 1884 if (!IsValidAddressRange(addr, size)) {
1872 LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})", 1885 LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
1873 addr, size); 1886 addr, size);
1874 return ERR_INVALID_ADDRESS_STATE; 1887 return ResultInvalidCurrentMemory;
1875 } 1888 }
1876 1889
1877 const auto perms{static_cast<Memory::MemoryPermission>(permissions)}; 1890 const auto perms{static_cast<Memory::MemoryPermission>(permissions)};
@@ -1879,10 +1892,17 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1879 perms == Memory::MemoryPermission::Write) { 1892 perms == Memory::MemoryPermission::Write) {
1880 LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})", 1893 LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
1881 permissions); 1894 permissions);
1882 return ERR_INVALID_MEMORY_PERMISSIONS; 1895 return ResultInvalidMemoryPermissions;
1883 } 1896 }
1884 1897
1885 auto& kernel = system.Kernel(); 1898 auto& kernel = system.Kernel();
1899 // Reserve a new transfer memory from the process resource limit.
1900 KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(),
1901 LimitableResource::TransferMemory);
1902 if (!trmem_reservation.Succeeded()) {
1903 LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory");
1904 return ResultResourceLimitedExceeded;
1905 }
1886 auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms); 1906 auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
1887 1907
1888 if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) { 1908 if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
@@ -1894,6 +1914,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
1894 if (result.Failed()) { 1914 if (result.Failed()) {
1895 return result.Code(); 1915 return result.Code();
1896 } 1916 }
1917 trmem_reservation.Commit();
1897 1918
1898 *handle = *result; 1919 *handle = *result;
1899 return RESULT_SUCCESS; 1920 return RESULT_SUCCESS;
@@ -1989,7 +2010,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
1989 LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw); 2010 LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw);
1990 return set_result; 2011 return set_result;
1991 } 2012 }
1992
1993 return RESULT_SUCCESS; 2013 return RESULT_SUCCESS;
1994} 2014}
1995 2015
@@ -2002,8 +2022,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle
2002static ResultCode SignalEvent(Core::System& system, Handle event_handle) { 2022static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
2003 LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle); 2023 LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle);
2004 2024
2025 auto& kernel = system.Kernel();
2005 // Get the current handle table. 2026 // Get the current handle table.
2006 const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 2027 const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
2028
2029 // Reserve a new event from the process resource limit.
2030 KScopedResourceReservation event_reservation(kernel.CurrentProcess(),
2031 LimitableResource::Events);
2032 if (!event_reservation.Succeeded()) {
2033 LOG_ERROR(Kernel, "Could not reserve a new event");
2034 return ResultResourceLimitedExceeded;
2035 }
2007 2036
2008 // Get the writable event. 2037 // Get the writable event.
2009 auto writable_event = handle_table.Get<KWritableEvent>(event_handle); 2038 auto writable_event = handle_table.Get<KWritableEvent>(event_handle);
@@ -2012,6 +2041,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
2012 return ResultInvalidHandle; 2041 return ResultInvalidHandle;
2013 } 2042 }
2014 2043
2044 // Commit the successfuly reservation.
2045 event_reservation.Commit();
2046
2015 return writable_event->Signal(); 2047 return writable_event->Signal();
2016} 2048}
2017 2049
@@ -2043,7 +2075,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) {
2043 2075
2044 LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle); 2076 LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle);
2045 2077
2046 return Svc::ResultInvalidHandle; 2078 return ResultInvalidHandle;
2047} 2079}
2048 2080
2049static ResultCode ClearEvent32(Core::System& system, Handle event_handle) { 2081static ResultCode ClearEvent32(Core::System& system, Handle event_handle) {
@@ -2106,13 +2138,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
2106 if (!process) { 2138 if (!process) {
2107 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", 2139 LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
2108 process_handle); 2140 process_handle);
2109 return ERR_INVALID_HANDLE; 2141 return ResultInvalidHandle;
2110 } 2142 }
2111 2143
2112 const auto info_type = static_cast<InfoType>(type); 2144 const auto info_type = static_cast<InfoType>(type);
2113 if (info_type != InfoType::Status) { 2145 if (info_type != InfoType::Status) {
2114 LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type); 2146 LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type);
2115 return ERR_INVALID_ENUM_VALUE; 2147 return ResultInvalidEnumValue;
2116 } 2148 }
2117 2149
2118 *out = static_cast<u64>(process->GetStatus()); 2150 *out = static_cast<u64>(process->GetStatus());
@@ -2174,7 +2206,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
2174 const auto type = static_cast<LimitableResource>(resource_type); 2206 const auto type = static_cast<LimitableResource>(resource_type);
2175 if (!IsValidResourceType(type)) { 2207 if (!IsValidResourceType(type)) {
2176 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); 2208 LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
2177 return ERR_INVALID_ENUM_VALUE; 2209 return ResultInvalidEnumValue;
2178 } 2210 }
2179 2211
2180 auto* const current_process = system.Kernel().CurrentProcess(); 2212 auto* const current_process = system.Kernel().CurrentProcess();
@@ -2185,16 +2217,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
2185 if (!resource_limit_object) { 2217 if (!resource_limit_object) {
2186 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", 2218 LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
2187 resource_limit); 2219 resource_limit);
2188 return ERR_INVALID_HANDLE; 2220 return ResultInvalidHandle;
2189 } 2221 }
2190 2222
2191 const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); 2223 const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
2192 if (set_result.IsError()) { 2224 if (set_result.IsError()) {
2193 LOG_ERROR( 2225 LOG_ERROR(Kernel_SVC,
2194 Kernel_SVC, 2226 "Attempted to lower resource limit ({}) for category '{}' below its current "
2195 "Attempted to lower resource limit ({}) for category '{}' below its current value ({})", 2227 "value ({})",
2196 resource_limit_object->GetLimitValue(type), resource_type, 2228 resource_limit_object->GetLimitValue(type), resource_type,
2197 resource_limit_object->GetCurrentValue(type)); 2229 resource_limit_object->GetCurrentValue(type));
2198 return set_result; 2230 return set_result;
2199 } 2231 }
2200 2232
@@ -2211,7 +2243,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
2211 LOG_ERROR(Kernel_SVC, 2243 LOG_ERROR(Kernel_SVC,
2212 "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}", 2244 "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
2213 out_process_ids_size); 2245 out_process_ids_size);
2214 return ERR_OUT_OF_RANGE; 2246 return ResultOutOfRange;
2215 } 2247 }
2216 2248
2217 const auto& kernel = system.Kernel(); 2249 const auto& kernel = system.Kernel();
@@ -2221,7 +2253,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
2221 out_process_ids, total_copy_size)) { 2253 out_process_ids, total_copy_size)) {
2222 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", 2254 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2223 out_process_ids, out_process_ids + total_copy_size); 2255 out_process_ids, out_process_ids + total_copy_size);
2224 return ERR_INVALID_ADDRESS_STATE; 2256 return ResultInvalidCurrentMemory;
2225 } 2257 }
2226 2258
2227 auto& memory = system.Memory(); 2259 auto& memory = system.Memory();
@@ -2250,7 +2282,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
2250 if ((out_thread_ids_size & 0xF0000000) != 0) { 2282 if ((out_thread_ids_size & 0xF0000000) != 0) {
2251 LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}", 2283 LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
2252 out_thread_ids_size); 2284 out_thread_ids_size);
2253 return ERR_OUT_OF_RANGE; 2285 return ResultOutOfRange;
2254 } 2286 }
2255 2287
2256 const auto* const current_process = system.Kernel().CurrentProcess(); 2288 const auto* const current_process = system.Kernel().CurrentProcess();
@@ -2260,7 +2292,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
2260 !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) { 2292 !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) {
2261 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", 2293 LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
2262 out_thread_ids, out_thread_ids + total_copy_size); 2294 out_thread_ids, out_thread_ids + total_copy_size);
2263 return ERR_INVALID_ADDRESS_STATE; 2295 return ResultInvalidCurrentMemory;
2264 } 2296 }
2265 2297
2266 auto& memory = system.Memory(); 2298 auto& memory = system.Memory();
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h
index 204cd989d..a26d9f2c9 100644
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -6,21 +6,36 @@
6 6
7#include "core/hle/result.h" 7#include "core/hle/result.h"
8 8
9namespace Kernel::Svc { 9namespace Kernel {
10 10
11// Confirmed Switch kernel error codes
12
13constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7};
14constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14};
11constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57}; 15constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57};
12constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59}; 16constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
17constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101};
13constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102}; 18constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
14constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103}; 19constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103};
20constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104};
21constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105};
15constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106}; 22constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
23constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108};
24constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110};
16constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112}; 25constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112};
17constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113}; 26constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113};
18constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114}; 27constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
28constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115};
19constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116}; 29constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116};
20constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117}; 30constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
21constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118}; 31constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
32constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119};
22constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120}; 33constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
34constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121};
23constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122}; 35constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122};
36constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123};
24constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125}; 37constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
38constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126};
39constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132};
25 40
26} // namespace Kernel::Svc 41} // namespace Kernel
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 765f408c3..6b0fc1591 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/k_resource_limit.h"
5#include "core/hle/kernel/kernel.h" 6#include "core/hle/kernel/kernel.h"
6#include "core/hle/kernel/memory/page_table.h" 7#include "core/hle/kernel/memory/page_table.h"
7#include "core/hle/kernel/process.h" 8#include "core/hle/kernel/process.h"
@@ -17,6 +18,7 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory)
17TransferMemory::~TransferMemory() { 18TransferMemory::~TransferMemory() {
18 // Release memory region when transfer memory is destroyed 19 // Release memory region when transfer memory is destroyed
19 Reset(); 20 Reset();
21 owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1);
20} 22}
21 23
22std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, 24std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index bb77c2569..8e1fe9438 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1047,20 +1047,21 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
1047 1047
1048 const u64 offset{rp.Pop<u64>()}; 1048 const u64 offset{rp.Pop<u64>()};
1049 const std::vector<u8> data{ctx.ReadBuffer()}; 1049 const std::vector<u8> data{ctx.ReadBuffer()};
1050 const std::size_t size{std::min(data.size(), backing.GetSize() - offset)};
1050 1051
1051 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size()); 1052 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
1052 1053
1053 if (data.size() > backing.GetSize() - offset) { 1054 if (offset > backing.GetSize()) {
1054 LOG_ERROR(Service_AM, 1055 LOG_ERROR(Service_AM,
1055 "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}", 1056 "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
1056 backing.GetSize(), data.size(), offset); 1057 backing.GetSize(), size, offset);
1057 1058
1058 IPC::ResponseBuilder rb{ctx, 2}; 1059 IPC::ResponseBuilder rb{ctx, 2};
1059 rb.Push(ERR_SIZE_OUT_OF_BOUNDS); 1060 rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
1060 return; 1061 return;
1061 } 1062 }
1062 1063
1063 std::memcpy(backing.GetData().data() + offset, data.data(), data.size()); 1064 std::memcpy(backing.GetData().data() + offset, data.data(), size);
1064 1065
1065 IPC::ResponseBuilder rb{ctx, 2}; 1066 IPC::ResponseBuilder rb{ctx, 2};
1066 rb.Push(RESULT_SUCCESS); 1067 rb.Push(RESULT_SUCCESS);
@@ -1070,11 +1071,11 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
1070 IPC::RequestParser rp{ctx}; 1071 IPC::RequestParser rp{ctx};
1071 1072
1072 const u64 offset{rp.Pop<u64>()}; 1073 const u64 offset{rp.Pop<u64>()};
1073 const std::size_t size{ctx.GetWriteBufferSize()}; 1074 const std::size_t size{std::min(ctx.GetWriteBufferSize(), backing.GetSize() - offset)};
1074 1075
1075 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size); 1076 LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);
1076 1077
1077 if (size > backing.GetSize() - offset) { 1078 if (offset > backing.GetSize()) {
1078 LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}", 1079 LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
1079 backing.GetSize(), size, offset); 1080 backing.GetSize(), size, offset);
1080 1081
diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp
index d7d3ee99a..c2bfe698f 100644
--- a/src/core/hle/service/am/applets/controller.cpp
+++ b/src/core/hle/service/am/applets/controller.cpp
@@ -211,7 +211,8 @@ void Controller::Execute() {
211 case ControllerSupportMode::ShowControllerFirmwareUpdate: 211 case ControllerSupportMode::ShowControllerFirmwareUpdate:
212 UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented", 212 UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
213 controller_private_arg.mode); 213 controller_private_arg.mode);
214 [[fallthrough]]; 214 ConfigurationComplete();
215 break;
215 default: { 216 default: {
216 ConfigurationComplete(); 217 ConfigurationComplete();
217 break; 218 break;
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 3022438b1..79b209c6b 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -121,6 +121,10 @@ void SoftwareKeyboard::ExecuteInteractive() {
121 std::memcpy(&request, data.data(), sizeof(Request)); 121 std::memcpy(&request, data.data(), sizeof(Request));
122 122
123 switch (request) { 123 switch (request) {
124 case Request::Finalize:
125 complete = true;
126 broker.SignalStateChanged();
127 break;
124 case Request::Calc: { 128 case Request::Calc: {
125 broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1})); 129 broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1}));
126 broker.SignalStateChanged(); 130 broker.SignalStateChanged();
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 51a010a55..1e2677320 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -110,6 +110,7 @@ void IAppletResource::DeactivateController(HidController controller) {
110 110
111IAppletResource ::~IAppletResource() { 111IAppletResource ::~IAppletResource() {
112 system.CoreTiming().UnscheduleEvent(pad_update_event, 0); 112 system.CoreTiming().UnscheduleEvent(pad_update_event, 0);
113 system.CoreTiming().UnscheduleEvent(motion_update_event, 0);
113} 114}
114 115
115void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { 116void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/ldn/errors.h b/src/core/hle/service/ldn/errors.h
new file mode 100644
index 000000000..a718c5c66
--- /dev/null
+++ b/src/core/hle/service/ldn/errors.h
@@ -0,0 +1,13 @@
1// Copyright 2021 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::LDN {
10
11constexpr ResultCode ERROR_DISABLED{ErrorModule::LDN, 22};
12
13} // namespace Service::LDN
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index ee908f399..c630d93cd 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -6,6 +6,7 @@
6 6
7#include "core/hle/ipc_helpers.h" 7#include "core/hle/ipc_helpers.h"
8#include "core/hle/result.h" 8#include "core/hle/result.h"
9#include "core/hle/service/ldn/errors.h"
9#include "core/hle/service/ldn/ldn.h" 10#include "core/hle/service/ldn/ldn.h"
10#include "core/hle/service/sm/sm.h" 11#include "core/hle/service/sm/sm.h"
11 12
@@ -103,7 +104,7 @@ public:
103 : ServiceFramework{system_, "IUserLocalCommunicationService"} { 104 : ServiceFramework{system_, "IUserLocalCommunicationService"} {
104 // clang-format off 105 // clang-format off
105 static const FunctionInfo functions[] = { 106 static const FunctionInfo functions[] = {
106 {0, nullptr, "GetState"}, 107 {0, &IUserLocalCommunicationService::GetState, "GetState"},
107 {1, nullptr, "GetNetworkInfo"}, 108 {1, nullptr, "GetNetworkInfo"},
108 {2, nullptr, "GetIpv4Address"}, 109 {2, nullptr, "GetIpv4Address"},
109 {3, nullptr, "GetDisconnectReason"}, 110 {3, nullptr, "GetDisconnectReason"},
@@ -138,13 +139,38 @@ public:
138 RegisterHandlers(functions); 139 RegisterHandlers(functions);
139 } 140 }
140 141
141 void Initialize2(Kernel::HLERequestContext& ctx) { 142 void GetState(Kernel::HLERequestContext& ctx) {
142 LOG_WARNING(Service_LDN, "(STUBBED) called"); 143 LOG_WARNING(Service_LDN, "(STUBBED) called");
143 // Result success seem make this services start network and continue. 144
144 // If we just pass result error then it will stop and maybe try again and again. 145 IPC::ResponseBuilder rb{ctx, 3};
146
147 // Indicate a network error, as we do not actually emulate LDN
148 rb.Push(static_cast<u32>(State::Error));
149
150 rb.Push(RESULT_SUCCESS);
151 }
152
153 void Initialize2(Kernel::HLERequestContext& ctx) {
154 LOG_DEBUG(Service_LDN, "called");
155
156 is_initialized = true;
157
145 IPC::ResponseBuilder rb{ctx, 2}; 158 IPC::ResponseBuilder rb{ctx, 2};
146 rb.Push(RESULT_UNKNOWN); 159 rb.Push(RESULT_SUCCESS);
147 } 160 }
161
162private:
163 enum class State {
164 None,
165 Initialized,
166 AccessPointOpened,
167 AccessPointCreated,
168 StationOpened,
169 StationConnected,
170 Error,
171 };
172
173 bool is_initialized{};
148}; 174};
149 175
150class LDNS final : public ServiceFramework<LDNS> { 176class LDNS final : public ServiceFramework<LDNS> {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9da786b4e..c724d2554 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -11,10 +11,10 @@
11#include "common/scope_exit.h" 11#include "common/scope_exit.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/hle/ipc_helpers.h" 13#include "core/hle/ipc_helpers.h"
14#include "core/hle/kernel/errors.h"
15#include "core/hle/kernel/memory/page_table.h" 14#include "core/hle/kernel/memory/page_table.h"
16#include "core/hle/kernel/memory/system_control.h" 15#include "core/hle/kernel/memory/system_control.h"
17#include "core/hle/kernel/process.h" 16#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/svc_results.h"
18#include "core/hle/service/ldr/ldr.h" 18#include "core/hle/service/ldr/ldr.h"
19#include "core/hle/service/service.h" 19#include "core/hle/service/service.h"
20#include "core/loader/nro.h" 20#include "core/loader/nro.h"
@@ -330,7 +330,7 @@ public:
330 const VAddr addr{GetRandomMapRegion(page_table, size)}; 330 const VAddr addr{GetRandomMapRegion(page_table, size)};
331 const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)}; 331 const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)};
332 332
333 if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { 333 if (result == Kernel::ResultInvalidCurrentMemory) {
334 continue; 334 continue;
335 } 335 }
336 336
@@ -361,7 +361,7 @@ public:
361 const ResultCode result{ 361 const ResultCode result{
362 page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)}; 362 page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)};
363 363
364 if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { 364 if (result == Kernel::ResultInvalidCurrentMemory) {
365 continue; 365 continue;
366 } 366 }
367 367
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 5d6d25696..2d1d4d67f 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -215,7 +215,7 @@ private:
215 const auto& amiibo = nfp_interface.GetAmiiboBuffer(); 215 const auto& amiibo = nfp_interface.GetAmiiboBuffer();
216 const TagInfo tag_info{ 216 const TagInfo tag_info{
217 .uuid = amiibo.uuid, 217 .uuid = amiibo.uuid,
218 .uuid_length = static_cast<u8>(tag_info.uuid.size()), 218 .uuid_length = static_cast<u8>(amiibo.uuid.size()),
219 .padding_1 = {}, 219 .padding_1 = {},
220 .protocol = 1, // TODO(ogniK): Figure out actual values 220 .protocol = 1, // TODO(ogniK): Figure out actual values
221 .tag_type = 2, 221 .tag_type = 2,
diff --git a/src/core/hle/service/olsc/olsc.cpp b/src/core/hle/service/olsc/olsc.cpp
index 4440135ed..e2ac71fa1 100644
--- a/src/core/hle/service/olsc/olsc.cpp
+++ b/src/core/hle/service/olsc/olsc.cpp
@@ -17,7 +17,7 @@ public:
17 static const FunctionInfo functions[] = { 17 static const FunctionInfo functions[] = {
18 {0, &OLSC::Initialize, "Initialize"}, 18 {0, &OLSC::Initialize, "Initialize"},
19 {10, nullptr, "VerifySaveDataBackupLicenseAsync"}, 19 {10, nullptr, "VerifySaveDataBackupLicenseAsync"},
20 {13, nullptr, "GetSaveDataBackupSetting"}, 20 {13, &OLSC::GetSaveDataBackupSetting, "GetSaveDataBackupSetting"},
21 {14, &OLSC::SetSaveDataBackupSettingEnabled, "SetSaveDataBackupSettingEnabled"}, 21 {14, &OLSC::SetSaveDataBackupSettingEnabled, "SetSaveDataBackupSettingEnabled"},
22 {15, nullptr, "SetCustomData"}, 22 {15, nullptr, "SetCustomData"},
23 {16, nullptr, "DeleteSaveDataBackupSetting"}, 23 {16, nullptr, "DeleteSaveDataBackupSetting"},
@@ -52,6 +52,17 @@ private:
52 rb.Push(RESULT_SUCCESS); 52 rb.Push(RESULT_SUCCESS);
53 } 53 }
54 54
55 void GetSaveDataBackupSetting(Kernel::HLERequestContext& ctx) {
56 LOG_WARNING(Service_OLSC, "(STUBBED) called");
57
58 // backup_setting is set to 0 since real value is unknown
59 constexpr u64 backup_setting = 0;
60
61 IPC::ResponseBuilder rb{ctx, 4};
62 rb.Push(RESULT_SUCCESS);
63 rb.Push(backup_setting);
64 }
65
55 void SetSaveDataBackupSettingEnabled(Kernel::HLERequestContext& ctx) { 66 void SetSaveDataBackupSettingEnabled(Kernel::HLERequestContext& ctx) {
56 LOG_WARNING(Service_OLSC, "(STUBBED) called"); 67 LOG_WARNING(Service_OLSC, "(STUBBED) called");
57 68
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 0b306b87a..78e9cd708 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -453,7 +453,8 @@ std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protoco
453 return {-1, Errno::MFILE}; 453 return {-1, Errno::MFILE};
454 } 454 }
455 455
456 FileDescriptor& descriptor = file_descriptors[fd].emplace(); 456 file_descriptors[fd] = FileDescriptor{};
457 FileDescriptor& descriptor = *file_descriptors[fd];
457 // ENONMEM might be thrown here 458 // ENONMEM might be thrown here
458 459
459 LOG_INFO(Service, "New socket fd={}", fd); 460 LOG_INFO(Service, "New socket fd={}", fd);
@@ -548,7 +549,8 @@ std::pair<s32, Errno> BSD::AcceptImpl(s32 fd, std::vector<u8>& write_buffer) {
548 return {-1, Translate(bsd_errno)}; 549 return {-1, Translate(bsd_errno)};
549 } 550 }
550 551
551 FileDescriptor& new_descriptor = file_descriptors[new_fd].emplace(); 552 file_descriptors[new_fd] = FileDescriptor{};
553 FileDescriptor& new_descriptor = *file_descriptors[new_fd];
552 new_descriptor.socket = std::move(result.socket); 554 new_descriptor.socket = std::move(result.socket);
553 new_descriptor.is_connection_based = descriptor.is_connection_based; 555 new_descriptor.is_connection_based = descriptor.is_connection_based;
554 556
diff --git a/src/core/settings.h b/src/core/settings.h
index a324530bd..d849dded3 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -181,12 +181,13 @@ struct Values {
181 std::string motion_device; 181 std::string motion_device;
182 std::string udp_input_servers; 182 std::string udp_input_servers;
183 183
184 bool emulate_analog_keyboard; 184 bool mouse_panning;
185 185 float mouse_panning_sensitivity;
186 bool mouse_enabled; 186 bool mouse_enabled;
187 std::string mouse_device; 187 std::string mouse_device;
188 MouseButtonsRaw mouse_buttons; 188 MouseButtonsRaw mouse_buttons;
189 189
190 bool emulate_analog_keyboard;
190 bool keyboard_enabled; 191 bool keyboard_enabled;
191 KeyboardKeysRaw keyboard_keys; 192 KeyboardKeysRaw keyboard_keys;
192 KeyboardModsRaw keyboard_mods; 193 KeyboardModsRaw keyboard_mods;
diff --git a/src/input_common/mouse/mouse_input.cpp b/src/input_common/mouse/mouse_input.cpp
index 10786a541..67a584d53 100644
--- a/src/input_common/mouse/mouse_input.cpp
+++ b/src/input_common/mouse/mouse_input.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2+ 2// Licensed under GPLv2+
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/settings.h"
5#include "input_common/mouse/mouse_input.h" 6#include "input_common/mouse/mouse_input.h"
6 7
7namespace MouseInput { 8namespace MouseInput {
@@ -36,6 +37,9 @@ void Mouse::UpdateThread() {
36 if (configuring) { 37 if (configuring) {
37 UpdateYuzuSettings(); 38 UpdateYuzuSettings();
38 } 39 }
40 if (mouse_panning_timout++ > 8) {
41 StopPanning();
42 }
39 std::this_thread::sleep_for(std::chrono::milliseconds(update_time)); 43 std::this_thread::sleep_for(std::chrono::milliseconds(update_time));
40 } 44 }
41} 45}
@@ -65,8 +69,34 @@ void Mouse::PressButton(int x, int y, int button_) {
65 mouse_info[button_index].data.pressed = true; 69 mouse_info[button_index].data.pressed = true;
66} 70}
67 71
68void Mouse::MouseMove(int x, int y) { 72void Mouse::StopPanning() {
73 for (MouseInfo& info : mouse_info) {
74 if (Settings::values.mouse_panning) {
75 info.data.axis = {};
76 info.tilt_speed = 0;
77 info.last_mouse_change = {};
78 }
79 }
80}
81
82void Mouse::MouseMove(int x, int y, int center_x, int center_y) {
69 for (MouseInfo& info : mouse_info) { 83 for (MouseInfo& info : mouse_info) {
84 if (Settings::values.mouse_panning) {
85 const auto mouse_change = Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y);
86 mouse_panning_timout = 0;
87
88 if (mouse_change.y == 0 && mouse_change.x == 0) {
89 continue;
90 }
91
92 info.last_mouse_change = (info.last_mouse_change * 0.8f) + (mouse_change * 0.2f);
93 info.data.axis = {static_cast<int>(16 * info.last_mouse_change.x),
94 static_cast<int>(16 * -info.last_mouse_change.y)};
95 info.tilt_direction = info.last_mouse_change;
96 info.tilt_speed = info.tilt_direction.Normalize() * info.sensitivity;
97 continue;
98 }
99
70 if (info.data.pressed) { 100 if (info.data.pressed) {
71 const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin; 101 const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin;
72 const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position; 102 const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position;
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h
index 58803c1bf..46aa676c1 100644
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -57,8 +57,10 @@ public:
57 * Signals that mouse has moved. 57 * Signals that mouse has moved.
58 * @param x the x-coordinate of the cursor 58 * @param x the x-coordinate of the cursor
59 * @param y the y-coordinate of the cursor 59 * @param y the y-coordinate of the cursor
60 * @param center_x the x-coordinate of the middle of the screen
61 * @param center_y the y-coordinate of the middle of the screen
60 */ 62 */
61 void MouseMove(int x, int y); 63 void MouseMove(int x, int y, int center_x, int center_y);
62 64
63 /** 65 /**
64 * Signals that a motion sensor tilt has ended. 66 * Signals that a motion sensor tilt has ended.
@@ -74,11 +76,13 @@ public:
74private: 76private:
75 void UpdateThread(); 77 void UpdateThread();
76 void UpdateYuzuSettings(); 78 void UpdateYuzuSettings();
79 void StopPanning();
77 80
78 struct MouseInfo { 81 struct MouseInfo {
79 InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f}; 82 InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f};
80 Common::Vec2<int> mouse_origin; 83 Common::Vec2<int> mouse_origin;
81 Common::Vec2<int> last_mouse_position; 84 Common::Vec2<int> last_mouse_position;
85 Common::Vec2<float> last_mouse_change;
82 bool is_tilting = false; 86 bool is_tilting = false;
83 float sensitivity{0.120f}; 87 float sensitivity{0.120f};
84 88
@@ -94,5 +98,6 @@ private:
94 Common::SPSCQueue<MouseStatus> mouse_queue; 98 Common::SPSCQueue<MouseStatus> mouse_queue;
95 bool configuring{false}; 99 bool configuring{false};
96 bool update_thread_running{true}; 100 bool update_thread_running{true};
101 int mouse_panning_timout{};
97}; 102};
98} // namespace MouseInput 103} // namespace MouseInput
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index 3d799b293..bb56787ee 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -6,6 +6,7 @@
6#include <utility> 6#include <utility>
7 7
8#include "common/threadsafe_queue.h" 8#include "common/threadsafe_queue.h"
9#include "core/settings.h"
9#include "input_common/mouse/mouse_input.h" 10#include "input_common/mouse/mouse_input.h"
10#include "input_common/mouse/mouse_poller.h" 11#include "input_common/mouse/mouse_poller.h"
11 12
@@ -71,7 +72,7 @@ public:
71 std::lock_guard lock{mutex}; 72 std::lock_guard lock{mutex};
72 const auto axis_value = 73 const auto axis_value =
73 static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis)); 74 static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis));
74 return axis_value / (100.0f * range); 75 return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range);
75 } 76 }
76 77
77 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { 78 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index e7e50d789..c4afa4174 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -144,6 +144,10 @@ Client::~Client() {
144 Reset(); 144 Reset();
145} 145}
146 146
147Client::ClientData::ClientData() = default;
148
149Client::ClientData::~ClientData() = default;
150
147std::vector<Common::ParamPackage> Client::GetInputDevices() const { 151std::vector<Common::ParamPackage> Client::GetInputDevices() const {
148 std::vector<Common::ParamPackage> devices; 152 std::vector<Common::ParamPackage> devices;
149 for (std::size_t client = 0; client < clients.size(); client++) { 153 for (std::size_t client = 0; client < clients.size(); client++) {
diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h
index 822f9c550..a523f6124 100644
--- a/src/input_common/udp/client.h
+++ b/src/input_common/udp/client.h
@@ -98,6 +98,9 @@ public:
98 98
99private: 99private:
100 struct ClientData { 100 struct ClientData {
101 ClientData();
102 ~ClientData();
103
101 std::string host{"127.0.0.1"}; 104 std::string host{"127.0.0.1"};
102 u16 port{26760}; 105 u16 port{26760};
103 std::size_t pad_index{}; 106 std::size_t pad_index{};
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp
index b630281a0..9829da6f0 100644
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -84,8 +84,8 @@ public:
84 84
85private: 85private:
86 const std::string ip; 86 const std::string ip;
87 const u16 port; 87 [[maybe_unused]] const u16 port;
88 const u16 pad; 88 [[maybe_unused]] const u16 pad;
89 CemuhookUDP::Client* client; 89 CemuhookUDP::Client* client;
90 mutable std::mutex mutex; 90 mutable std::mutex mutex;
91}; 91};
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index 651633e9e..edced69bb 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); 471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); 472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
473} 473}
474
475TEST_CASE("BufferBase: Cached write") {
476 RasterizerInterface rasterizer;
477 BufferBase buffer(rasterizer, c, WORD);
478 buffer.UnmarkRegionAsCpuModified(c, WORD);
479 buffer.CachedCpuWrite(c + PAGE, PAGE);
480 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
481 buffer.FlushCachedWrites();
482 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
483 buffer.MarkRegionAsCpuModified(c, WORD);
484 REQUIRE(rasterizer.Count() == 0);
485}
486
487TEST_CASE("BufferBase: Multiple cached write") {
488 RasterizerInterface rasterizer;
489 BufferBase buffer(rasterizer, c, WORD);
490 buffer.UnmarkRegionAsCpuModified(c, WORD);
491 buffer.CachedCpuWrite(c + PAGE, PAGE);
492 buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
493 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
494 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
495 buffer.FlushCachedWrites();
496 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
497 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
498 buffer.MarkRegionAsCpuModified(c, WORD);
499 REQUIRE(rasterizer.Count() == 0);
500}
501
502TEST_CASE("BufferBase: Cached write unmarked") {
503 RasterizerInterface rasterizer;
504 BufferBase buffer(rasterizer, c, WORD);
505 buffer.UnmarkRegionAsCpuModified(c, WORD);
506 buffer.CachedCpuWrite(c + PAGE, PAGE);
507 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
508 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
509 buffer.FlushCachedWrites();
510 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
511 buffer.MarkRegionAsCpuModified(c, WORD);
512 REQUIRE(rasterizer.Count() == 0);
513}
514
515TEST_CASE("BufferBase: Cached write iterated") {
516 RasterizerInterface rasterizer;
517 BufferBase buffer(rasterizer, c, WORD);
518 buffer.UnmarkRegionAsCpuModified(c, WORD);
519 buffer.CachedCpuWrite(c + PAGE, PAGE);
520 int num = 0;
521 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
522 REQUIRE(num == 0);
523 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
524 buffer.FlushCachedWrites();
525 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
526 buffer.MarkRegionAsCpuModified(c, WORD);
527 REQUIRE(rasterizer.Count() == 0);
528}
529
530TEST_CASE("BufferBase: Cached write downloads") {
531 RasterizerInterface rasterizer;
532 BufferBase buffer(rasterizer, c, WORD);
533 buffer.UnmarkRegionAsCpuModified(c, WORD);
534 REQUIRE(rasterizer.Count() == 64);
535 buffer.CachedCpuWrite(c + PAGE, PAGE);
536 REQUIRE(rasterizer.Count() == 63);
537 buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
538 int num = 0;
539 buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites();
545 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
547 buffer.MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2cf95937e..9b931976a 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,10 +2,8 @@ add_subdirectory(host_shaders)
2 2
3add_library(video_core STATIC 3add_library(video_core STATIC
4 buffer_cache/buffer_base.h 4 buffer_cache/buffer_base.h
5 buffer_cache/buffer_block.h 5 buffer_cache/buffer_cache.cpp
6 buffer_cache/buffer_cache.h 6 buffer_cache/buffer_cache.h
7 buffer_cache/map_interval.cpp
8 buffer_cache/map_interval.h
9 cdma_pusher.cpp 7 cdma_pusher.cpp
10 cdma_pusher.h 8 cdma_pusher.h
11 command_classes/codecs/codec.cpp 9 command_classes/codecs/codec.cpp
@@ -152,8 +150,6 @@ add_library(video_core STATIC
152 renderer_vulkan/vk_staging_buffer_pool.h 150 renderer_vulkan/vk_staging_buffer_pool.h
153 renderer_vulkan/vk_state_tracker.cpp 151 renderer_vulkan/vk_state_tracker.cpp
154 renderer_vulkan/vk_state_tracker.h 152 renderer_vulkan/vk_state_tracker.h
155 renderer_vulkan/vk_stream_buffer.cpp
156 renderer_vulkan/vk_stream_buffer.h
157 renderer_vulkan/vk_swapchain.cpp 153 renderer_vulkan/vk_swapchain.cpp
158 renderer_vulkan/vk_swapchain.h 154 renderer_vulkan/vk_swapchain.h
159 renderer_vulkan/vk_texture_cache.cpp 155 renderer_vulkan/vk_texture_cache.cpp
@@ -271,14 +267,13 @@ create_target_directory_groups(video_core)
271target_link_libraries(video_core PUBLIC common core) 267target_link_libraries(video_core PUBLIC common core)
272target_link_libraries(video_core PRIVATE glad xbyak) 268target_link_libraries(video_core PRIVATE glad xbyak)
273 269
274if (MSVC) 270if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
275 target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) 271 add_dependencies(video_core ffmpeg-build)
276 target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib)
277else()
278 target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
279 target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES})
280endif() 272endif()
281 273
274target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
275target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
276
282add_dependencies(video_core host_shaders) 277add_dependencies(video_core host_shaders)
283target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) 278target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
284target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 279target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index ee8602ce9..0c00ae280 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {
19 19
20enum class BufferFlagBits { 20enum class BufferFlagBits {
21 Picked = 1 << 0, 21 Picked = 1 << 0,
22 CachedWrites = 1 << 1,
22}; 23};
23DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) 24DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
24 25
@@ -40,7 +41,7 @@ class BufferBase {
40 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; 41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
41 42
42 /// Vector tracking modified pages tightly packed with small vector optimization 43 /// Vector tracking modified pages tightly packed with small vector optimization
43 union WrittenWords { 44 union WordsArray {
44 /// Returns the pointer to the words state 45 /// Returns the pointer to the words state
45 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { 46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
46 return is_short ? &stack : heap; 47 return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
55 u64* heap; ///< Not-small buffers pointer to the storage 56 u64* heap; ///< Not-small buffers pointer to the storage
56 }; 57 };
57 58
58 struct GpuCpuWords { 59 struct Words {
59 explicit GpuCpuWords() = default; 60 explicit Words() = default;
60 explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { 61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
61 if (IsShort()) { 62 if (IsShort()) {
62 cpu.stack = ~u64{0}; 63 cpu.stack = ~u64{0};
63 gpu.stack = 0; 64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
64 } else { 67 } else {
65 // Share allocation between CPU and GPU pages and set their default values 68 // Share allocation between CPU and GPU pages and set their default values
66 const size_t num_words = NumWords(); 69 const size_t num_words = NumWords();
67 u64* const alloc = new u64[num_words * 2]; 70 u64* const alloc = new u64[num_words * 4];
68 cpu.heap = alloc; 71 cpu.heap = alloc;
69 gpu.heap = alloc + num_words; 72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
70 std::fill_n(cpu.heap, num_words, ~u64{0}); 75 std::fill_n(cpu.heap, num_words, ~u64{0});
71 std::fill_n(gpu.heap, num_words, 0); 76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
72 } 79 }
73 // Clean up tailing bits 80 // Clean up tailing bits
74 const u64 last_local_page = 81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
75 Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); 82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
76 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; 83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
77 u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; 84 const u64 last_word = (~u64{0} << shift) >> shift;
78 last_word = (last_word << shift) >> shift; 85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
79 } 87 }
80 88
81 ~GpuCpuWords() { 89 ~Words() {
82 Release(); 90 Release();
83 } 91 }
84 92
85 GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { 93 Words& operator=(Words&& rhs) noexcept {
86 Release(); 94 Release();
87 size_bytes = rhs.size_bytes; 95 size_bytes = rhs.size_bytes;
88 cpu = rhs.cpu; 96 cpu = rhs.cpu;
89 gpu = rhs.gpu; 97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
90 rhs.cpu.heap = nullptr; 100 rhs.cpu.heap = nullptr;
91 return *this; 101 return *this;
92 } 102 }
93 103
94 GpuCpuWords(GpuCpuWords&& rhs) noexcept 104 Words(Words&& rhs) noexcept
95 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { 105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
96 rhs.cpu.heap = nullptr; 107 rhs.cpu.heap = nullptr;
97 } 108 }
98 109
99 GpuCpuWords& operator=(const GpuCpuWords&) = delete; 110 Words& operator=(const Words&) = delete;
100 GpuCpuWords(const GpuCpuWords&) = delete; 111 Words(const Words&) = delete;
101 112
102 /// Returns true when the buffer fits in the small vector optimization 113 /// Returns true when the buffer fits in the small vector optimization
103 [[nodiscard]] bool IsShort() const noexcept { 114 [[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
118 } 129 }
119 130
120 u64 size_bytes = 0; 131 u64 size_bytes = 0;
121 WrittenWords cpu; 132 WordsArray cpu;
122 WrittenWords gpu; 133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
123 }; 143 };
124 144
125public: 145public:
@@ -132,68 +152,93 @@ public:
132 BufferBase& operator=(const BufferBase&) = delete; 152 BufferBase& operator=(const BufferBase&) = delete;
133 BufferBase(const BufferBase&) = delete; 153 BufferBase(const BufferBase&) = delete;
134 154
155 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default;
157
135 /// Returns the inclusive CPU modified range in a begin end pair 158 /// Returns the inclusive CPU modified range in a begin end pair
136 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, 159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
137 u64 query_size) const noexcept { 160 u64 query_size) const noexcept {
138 const u64 offset = query_cpu_addr - cpu_addr; 161 const u64 offset = query_cpu_addr - cpu_addr;
139 return ModifiedRegion<false>(offset, query_size); 162 return ModifiedRegion<Type::CPU>(offset, query_size);
140 } 163 }
141 164
142 /// Returns the inclusive GPU modified range in a begin end pair 165 /// Returns the inclusive GPU modified range in a begin end pair
143 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, 166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
144 u64 query_size) const noexcept { 167 u64 query_size) const noexcept {
145 const u64 offset = query_cpu_addr - cpu_addr; 168 const u64 offset = query_cpu_addr - cpu_addr;
146 return ModifiedRegion<true>(offset, query_size); 169 return ModifiedRegion<Type::GPU>(offset, query_size);
147 } 170 }
148 171
149 /// Returns true if a region has been modified from the CPU 172 /// Returns true if a region has been modified from the CPU
150 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
151 const u64 offset = query_cpu_addr - cpu_addr; 174 const u64 offset = query_cpu_addr - cpu_addr;
152 return IsRegionModified<false>(offset, query_size); 175 return IsRegionModified<Type::CPU>(offset, query_size);
153 } 176 }
154 177
155 /// Returns true if a region has been modified from the GPU 178 /// Returns true if a region has been modified from the GPU
156 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
157 const u64 offset = query_cpu_addr - cpu_addr; 180 const u64 offset = query_cpu_addr - cpu_addr;
158 return IsRegionModified<true>(offset, query_size); 181 return IsRegionModified<Type::GPU>(offset, query_size);
159 } 182 }
160 183
161 /// Mark region as CPU modified, notifying the rasterizer about this change 184 /// Mark region as CPU modified, notifying the rasterizer about this change
162 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
163 ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); 186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
164 } 187 }
165 188
166 /// Unmark region as CPU modified, notifying the rasterizer about this change 189 /// Unmark region as CPU modified, notifying the rasterizer about this change
167 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
168 ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); 191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
169 } 192 }
170 193
171 /// Mark region as modified from the host GPU 194 /// Mark region as modified from the host GPU
172 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
173 ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); 196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
174 } 197 }
175 198
176 /// Unmark region as modified from the host GPU 199 /// Unmark region as modified from the host GPU
177 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
178 ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); 201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 const u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 }
179 } 224 }
180 225
181 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified 226 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
182 template <typename Func> 227 template <typename Func>
183 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { 228 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
184 ForEachModifiedRange<false, true>(query_cpu_range, size, func); 229 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
185 } 230 }
186 231
187 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 232 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
188 template <typename Func> 233 template <typename Func>
189 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { 234 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
190 ForEachModifiedRange<true, false>(query_cpu_range, size, func); 235 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
191 } 236 }
192 237
193 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 238 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
194 template <typename Func> 239 template <typename Func>
195 void ForEachDownloadRange(Func&& func) { 240 void ForEachDownloadRange(Func&& func) {
196 ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); 241 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
197 } 242 }
198 243
199 /// Mark buffer as picked 244 /// Mark buffer as picked
@@ -206,6 +251,16 @@ public:
206 flags &= ~BufferFlagBits::Picked; 251 flags &= ~BufferFlagBits::Picked;
207 } 252 }
208 253
254 /// Increases the likeliness of this being a stream buffer
255 void IncreaseStreamScore(int score) noexcept {
256 stream_score += score;
257 }
258
259 /// Returns the likeliness of this being a stream buffer
260 [[nodiscard]] int StreamScore() const noexcept {
261 return stream_score;
262 }
263
209 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer 264 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
210 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { 265 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
211 return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); 266 return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
@@ -216,6 +271,11 @@ public:
216 return True(flags & BufferFlagBits::Picked); 271 return True(flags & BufferFlagBits::Picked);
217 } 272 }
218 273
274 /// Returns true when the buffer has pending cached writes
275 [[nodiscard]] bool HasCachedWrites() const noexcept {
276 return True(flags & BufferFlagBits::CachedWrites);
277 }
278
219 /// Returns the base CPU address of the buffer 279 /// Returns the base CPU address of the buffer
220 [[nodiscard]] VAddr CpuAddr() const noexcept { 280 [[nodiscard]] VAddr CpuAddr() const noexcept {
221 return cpu_addr; 281 return cpu_addr;
@@ -233,26 +293,48 @@ public:
233 } 293 }
234 294
235private: 295private:
296 template <Type type>
297 u64* Array() noexcept {
298 if constexpr (type == Type::CPU) {
299 return words.cpu.Pointer(IsShort());
300 } else if constexpr (type == Type::GPU) {
301 return words.gpu.Pointer(IsShort());
302 } else if constexpr (type == Type::CachedCPU) {
303 return words.cached_cpu.Pointer(IsShort());
304 } else if constexpr (type == Type::Untracked) {
305 return words.untracked.Pointer(IsShort());
306 }
307 }
308
309 template <Type type>
310 const u64* Array() const noexcept {
311 if constexpr (type == Type::CPU) {
312 return words.cpu.Pointer(IsShort());
313 } else if constexpr (type == Type::GPU) {
314 return words.gpu.Pointer(IsShort());
315 } else if constexpr (type == Type::CachedCPU) {
316 return words.cached_cpu.Pointer(IsShort());
317 } else if constexpr (type == Type::Untracked) {
318 return words.untracked.Pointer(IsShort());
319 }
320 }
321
236 /** 322 /**
237 * Change the state of a range of pages 323 * Change the state of a range of pages
238 * 324 *
239 * @param written_words Pages to be marked or unmarked as modified
240 * @param dirty_addr Base address to mark or unmark as modified 325 * @param dirty_addr Base address to mark or unmark as modified
241 * @param size Size in bytes to mark or unmark as modified 326 * @param size Size in bytes to mark or unmark as modified
242 *
243 * @tparam enable True when the bits will be set to one, false for zero
244 * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
245 */ 327 */
246 template <bool enable, bool notify_rasterizer> 328 template <Type type, bool enable>
247 void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, 329 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
248 s64 size) noexcept(!notify_rasterizer) {
249 const s64 difference = dirty_addr - cpu_addr; 330 const s64 difference = dirty_addr - cpu_addr;
250 const u64 offset = std::max<s64>(difference, 0); 331 const u64 offset = std::max<s64>(difference, 0);
251 size += std::min<s64>(difference, 0); 332 size += std::min<s64>(difference, 0);
252 if (offset >= SizeBytes() || size < 0) { 333 if (offset >= SizeBytes() || size < 0) {
253 return; 334 return;
254 } 335 }
255 u64* const state_words = written_words.Pointer(IsShort()); 336 u64* const untracked_words = Array<Type::Untracked>();
337 u64* const state_words = Array<type>();
256 const u64 offset_end = std::min(offset + size, SizeBytes()); 338 const u64 offset_end = std::min(offset + size, SizeBytes());
257 const u64 begin_page_index = offset / BYTES_PER_PAGE; 339 const u64 begin_page_index = offset / BYTES_PER_PAGE;
258 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; 340 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +350,19 @@ private:
268 u64 bits = ~u64{0}; 350 u64 bits = ~u64{0};
269 bits = (bits >> right_offset) << right_offset; 351 bits = (bits >> right_offset) << right_offset;
270 bits = (bits << left_offset) >> left_offset; 352 bits = (bits << left_offset) >> left_offset;
271 if constexpr (notify_rasterizer) { 353 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
272 NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); 354 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
273 } 355 }
274 if constexpr (enable) { 356 if constexpr (enable) {
275 state_words[word_index] |= bits; 357 state_words[word_index] |= bits;
358 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
359 untracked_words[word_index] |= bits;
360 }
276 } else { 361 } else {
277 state_words[word_index] &= ~bits; 362 state_words[word_index] &= ~bits;
363 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
364 untracked_words[word_index] &= ~bits;
365 }
278 } 366 }
279 page_index = 0; 367 page_index = 0;
280 ++word_index; 368 ++word_index;
@@ -291,7 +379,7 @@ private:
291 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 379 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
292 */ 380 */
293 template <bool add_to_rasterizer> 381 template <bool add_to_rasterizer>
294 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { 382 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
295 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 383 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
296 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 384 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
297 while (changed_bits != 0) { 385 while (changed_bits != 0) {
@@ -315,21 +403,20 @@ private:
315 * @param query_cpu_range Base CPU address to loop over 403 * @param query_cpu_range Base CPU address to loop over
316 * @param size Size in bytes of the CPU range to loop over 404 * @param size Size in bytes of the CPU range to loop over
317 * @param func Function to call for each turned off region 405 * @param func Function to call for each turned off region
318 *
319 * @tparam gpu True for host GPU pages, false for CPU pages
320 * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
321 */ 406 */
322 template <bool gpu, bool notify_rasterizer, typename Func> 407 template <Type type, typename Func>
323 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { 408 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
409 static_assert(type != Type::Untracked);
410
324 const s64 difference = query_cpu_range - cpu_addr; 411 const s64 difference = query_cpu_range - cpu_addr;
325 const u64 query_begin = std::max<s64>(difference, 0); 412 const u64 query_begin = std::max<s64>(difference, 0);
326 size += std::min<s64>(difference, 0); 413 size += std::min<s64>(difference, 0);
327 if (query_begin >= SizeBytes() || size < 0) { 414 if (query_begin >= SizeBytes() || size < 0) {
328 return; 415 return;
329 } 416 }
330 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 417 u64* const untracked_words = Array<Type::Untracked>();
418 u64* const state_words = Array<type>();
331 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); 419 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
332 u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
333 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; 420 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
334 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); 421 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
335 422
@@ -345,7 +432,8 @@ private:
345 const u64 word_index_end = std::distance(state_words, last_modified_word); 432 const u64 word_index_end = std::distance(state_words, last_modified_word);
346 433
347 const unsigned local_page_begin = std::countr_zero(*first_modified_word); 434 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
348 const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); 435 const unsigned local_page_end =
436 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
349 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; 437 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
350 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; 438 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
351 const u64 query_page_begin = query_begin / BYTES_PER_PAGE; 439 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +459,13 @@ private:
371 const u64 current_word = state_words[word_index] & bits; 459 const u64 current_word = state_words[word_index] & bits;
372 state_words[word_index] &= ~bits; 460 state_words[word_index] &= ~bits;
373 461
374 // Exclude CPU modified pages when visiting GPU pages 462 if constexpr (type == Type::CPU) {
375 const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); 463 const u64 current_bits = untracked_words[word_index] & bits;
376 if constexpr (notify_rasterizer) { 464 untracked_words[word_index] &= ~bits;
377 NotifyRasterizer<true>(word_index, word, ~u64{0}); 465 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
378 } 466 }
467 // Exclude CPU modified pages when visiting GPU pages
468 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
379 u64 page = page_begin; 469 u64 page = page_begin;
380 page_begin = 0; 470 page_begin = 0;
381 471
@@ -416,17 +506,20 @@ private:
416 * @param offset Offset in bytes from the start of the buffer 506 * @param offset Offset in bytes from the start of the buffer
417 * @param size Size in bytes of the region to query for modifications 507 * @param size Size in bytes of the region to query for modifications
418 */ 508 */
419 template <bool gpu> 509 template <Type type>
420 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 510 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
421 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 511 static_assert(type != Type::Untracked);
422 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 512
513 const u64* const untracked_words = Array<Type::Untracked>();
514 const u64* const state_words = Array<type>();
423 const u64 num_query_words = size / BYTES_PER_WORD + 1; 515 const u64 num_query_words = size / BYTES_PER_WORD + 1;
424 const u64 word_begin = offset / BYTES_PER_WORD; 516 const u64 word_begin = offset / BYTES_PER_WORD;
425 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 517 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
426 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 518 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
427 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 519 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
428 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 520 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
429 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 521 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
522 const u64 word = state_words[word_index] & ~off_word;
430 if (word == 0) { 523 if (word == 0) {
431 continue; 524 continue;
432 } 525 }
@@ -445,13 +538,13 @@ private:
445 * 538 *
446 * @param offset Offset in bytes from the start of the buffer 539 * @param offset Offset in bytes from the start of the buffer
447 * @param size Size in bytes of the region to query for modifications 540 * @param size Size in bytes of the region to query for modifications
448 *
449 * @tparam gpu True to query GPU modified pages, false for CPU pages
450 */ 541 */
451 template <bool gpu> 542 template <Type type>
452 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { 543 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
453 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 544 static_assert(type != Type::Untracked);
454 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 545
546 const u64* const untracked_words = Array<Type::Untracked>();
547 const u64* const state_words = Array<type>();
455 const u64 num_query_words = size / BYTES_PER_WORD + 1; 548 const u64 num_query_words = size / BYTES_PER_WORD + 1;
456 const u64 word_begin = offset / BYTES_PER_WORD; 549 const u64 word_begin = offset / BYTES_PER_WORD;
457 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 550 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +553,8 @@ private:
460 u64 begin = std::numeric_limits<u64>::max(); 553 u64 begin = std::numeric_limits<u64>::max();
461 u64 end = 0; 554 u64 end = 0;
462 for (u64 word_index = word_begin; word_index < word_end; ++word_index) { 555 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
463 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 556 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
557 const u64 word = state_words[word_index] & ~off_word;
464 if (word == 0) { 558 if (word == 0) {
465 continue; 559 continue;
466 } 560 }
@@ -488,8 +582,9 @@ private:
488 582
489 RasterizerInterface* rasterizer = nullptr; 583 RasterizerInterface* rasterizer = nullptr;
490 VAddr cpu_addr = 0; 584 VAddr cpu_addr = 0;
491 GpuCpuWords words; 585 Words words;
492 BufferFlagBits flags{}; 586 BufferFlagBits flags{};
587 int stream_score = 0;
493}; 588};
494 589
495} // namespace VideoCommon 590} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
deleted file mode 100644
index e9306194a..000000000
--- a/src/video_core/buffer_cache/buffer_block.h
+++ /dev/null
@@ -1,62 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11class BufferBlock {
12public:
13 [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
14 return (cpu_addr < end) && (cpu_addr_end > start);
15 }
16
17 [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
18 return cpu_addr <= other_start && other_end <= cpu_addr_end;
19 }
20
21 [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
22 return static_cast<std::size_t>(in_addr - cpu_addr);
23 }
24
25 [[nodiscard]] VAddr CpuAddr() const {
26 return cpu_addr;
27 }
28
29 [[nodiscard]] VAddr CpuAddrEnd() const {
30 return cpu_addr_end;
31 }
32
33 void SetCpuAddr(VAddr new_addr) {
34 cpu_addr = new_addr;
35 cpu_addr_end = new_addr + size;
36 }
37
38 [[nodiscard]] std::size_t Size() const {
39 return size;
40 }
41
42 [[nodiscard]] u64 Epoch() const {
43 return epoch;
44 }
45
46 void SetEpoch(u64 new_epoch) {
47 epoch = new_epoch;
48 }
49
50protected:
51 explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
52 SetCpuAddr(cpu_addr_);
53 }
54
55private:
56 VAddr cpu_addr{};
57 VAddr cpu_addr_end{};
58 std::size_t size{};
59 u64 epoch{};
60};
61
62} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
new file mode 100644
index 000000000..ab32294c8
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/microprofile.h"
6
7namespace VideoCommon {
8
9MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
10MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
11MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
12
13} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83b9ee871..2a6844ab1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -4,591 +4,1289 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <list> 7#include <algorithm>
8#include <array>
9#include <deque>
8#include <memory> 10#include <memory>
9#include <mutex> 11#include <mutex>
12#include <span>
10#include <unordered_map> 13#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector> 14#include <vector>
14 15
15#include <boost/container/small_vector.hpp> 16#include <boost/container/small_vector.hpp>
16#include <boost/icl/interval_set.hpp>
17#include <boost/intrusive/set.hpp>
18 17
19#include "common/alignment.h"
20#include "common/assert.h"
21#include "common/common_types.h" 18#include "common/common_types.h"
22#include "common/logging/log.h" 19#include "common/div_ceil.h"
23#include "core/core.h" 20#include "common/microprofile.h"
21#include "common/scope_exit.h"
24#include "core/memory.h" 22#include "core/memory.h"
25#include "core/settings.h" 23#include "core/settings.h"
26#include "video_core/buffer_cache/buffer_block.h" 24#include "video_core/buffer_cache/buffer_base.h"
27#include "video_core/buffer_cache/map_interval.h" 25#include "video_core/delayed_destruction_ring.h"
26#include "video_core/dirty_flags.h"
27#include "video_core/engines/kepler_compute.h"
28#include "video_core/engines/maxwell_3d.h"
28#include "video_core/memory_manager.h" 29#include "video_core/memory_manager.h"
29#include "video_core/rasterizer_interface.h" 30#include "video_core/rasterizer_interface.h"
31#include "video_core/texture_cache/slot_vector.h"
32#include "video_core/texture_cache/types.h"
30 33
31namespace VideoCommon { 34namespace VideoCommon {
32 35
33template <typename Buffer, typename BufferType, typename StreamBuffer> 36MICROPROFILE_DECLARE(GPU_PrepareBuffers);
37MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
38MICROPROFILE_DECLARE(GPU_DownloadMemory);
39
40using BufferId = SlotId;
41
42constexpr u32 NUM_VERTEX_BUFFERS = 32;
43constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
44constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
45constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
46constexpr u32 NUM_STORAGE_BUFFERS = 16;
47constexpr u32 NUM_STAGES = 5;
48
49template <typename P>
34class BufferCache { 50class BufferCache {
35 using IntervalSet = boost::icl::interval_set<VAddr>; 51 // Page size for caching purposes.
36 using IntervalType = typename IntervalSet::interval_type; 52 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; 53 static constexpr u32 PAGE_BITS = 16;
54 static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS;
55
56 static constexpr bool IS_OPENGL = P::IS_OPENGL;
57 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
58 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
59 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
60 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
61 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
62 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
63 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
64
65 static constexpr BufferId NULL_BUFFER_ID{0};
66
67 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
68
69 using Runtime = typename P::Runtime;
70 using Buffer = typename P::Buffer;
71
72 struct Empty {};
73
74 struct OverlapResult {
75 std::vector<BufferId> ids;
76 VAddr begin;
77 VAddr end;
78 bool has_stream_leap = false;
79 };
38 80
39 static constexpr u64 WRITE_PAGE_BIT = 11; 81 struct Binding {
40 static constexpr u64 BLOCK_PAGE_BITS = 21; 82 VAddr cpu_addr{};
41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; 83 u32 size{};
84 BufferId buffer_id;
85 };
42 86
43public: 87 static constexpr Binding NULL_BINDING{
44 struct BufferInfo { 88 .cpu_addr = 0,
45 BufferType handle; 89 .size = 0,
46 u64 offset; 90 .buffer_id = NULL_BUFFER_ID,
47 u64 address;
48 }; 91 };
49 92
50 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 93public:
51 bool is_written = false, bool use_fast_cbuf = false) { 94 static constexpr u32 SKIP_CACHE_SIZE = 4096;
52 std::lock_guard lock{mutex};
53 95
54 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 96 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
55 if (!cpu_addr) { 97 Tegra::Engines::Maxwell3D& maxwell3d_,
56 return GetEmptyBuffer(size); 98 Tegra::Engines::KeplerCompute& kepler_compute_,
57 } 99 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
100 Runtime& runtime_);
58 101
59 // Cache management is a big overhead, so only cache entries with a given size. 102 void TickFrame();
60 // TODO: Figure out which size is the best for given games.
61 constexpr std::size_t max_stream_size = 0x800;
62 if (use_fast_cbuf || size < max_stream_size) {
63 if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
64 const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
65 if (use_fast_cbuf) {
66 u8* dest;
67 if (is_granular) {
68 dest = gpu_memory.GetPointer(gpu_addr);
69 } else {
70 staging_buffer.resize(size);
71 dest = staging_buffer.data();
72 gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
73 }
74 return ConstBufferUpload(dest, size);
75 }
76 if (is_granular) {
77 u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
78 return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
79 std::memcpy(dest, host_ptr, size);
80 });
81 } else {
82 return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
83 gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
84 });
85 }
86 }
87 }
88 103
89 Buffer* const block = GetBlock(*cpu_addr, size); 104 void WriteMemory(VAddr cpu_addr, u64 size);
90 MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
91 if (!map) {
92 return GetEmptyBuffer(size);
93 }
94 if (is_written) {
95 map->MarkAsModified(true, GetModifiedTicks());
96 if (Settings::IsGPULevelHigh() &&
97 Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
98 MarkForAsyncFlush(map);
99 }
100 if (!map->is_written) {
101 map->is_written = true;
102 MarkRegionAsWritten(map->start, map->end - 1);
103 }
104 }
105 105
106 return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; 106 void CachedWriteMemory(VAddr cpu_addr, u64 size);
107 }
108 107
109 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. 108 void DownloadMemory(VAddr cpu_addr, u64 size);
110 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
111 std::size_t alignment = 4) {
112 std::lock_guard lock{mutex};
113 return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
114 std::memcpy(dest, raw_pointer, size);
115 });
116 }
117 109
118 /// Prepares the buffer cache for data uploading 110 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
119 /// @param max_size Maximum number of bytes that will be uploaded
120 /// @return True when a stream buffer invalidation was required, false otherwise
121 void Map(std::size_t max_size) {
122 std::lock_guard lock{mutex};
123 111
124 std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); 112 void UpdateGraphicsBuffers(bool is_indexed);
125 buffer_offset = buffer_offset_base;
126 }
127 113
128 /// Finishes the upload stream 114 void UpdateComputeBuffers();
129 void Unmap() {
130 std::lock_guard lock{mutex};
131 stream_buffer.Unmap(buffer_offset - buffer_offset_base);
132 }
133 115
134 /// Function called at the end of each frame, inteded for deferred operations 116 void BindHostGeometryBuffers(bool is_indexed);
135 void TickFrame() {
136 ++epoch;
137 117
138 while (!pending_destruction.empty()) { 118 void BindHostStageBuffers(size_t stage);
139 // Delay at least 4 frames before destruction.
140 // This is due to triple buffering happening on some drivers.
141 static constexpr u64 epochs_to_destroy = 5;
142 if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
143 break;
144 }
145 pending_destruction.pop();
146 }
147 }
148 119
149 /// Write any cached resources overlapping the specified region back to memory 120 void BindHostComputeBuffers();
150 void FlushRegion(VAddr addr, std::size_t size) {
151 std::lock_guard lock{mutex};
152 121
153 VectorMapInterval objects = GetMapsInRange(addr, size); 122 void SetEnabledUniformBuffers(size_t stage, u32 enabled);
154 std::sort(objects.begin(), objects.end(),
155 [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
156 for (MapInterval* object : objects) {
157 if (object->is_modified && object->is_registered) {
158 mutex.unlock();
159 FlushMap(object);
160 mutex.lock();
161 }
162 }
163 }
164 123
165 bool MustFlushRegion(VAddr addr, std::size_t size) { 124 void SetEnabledComputeUniformBuffers(u32 enabled);
166 std::lock_guard lock{mutex};
167 125
168 const VectorMapInterval objects = GetMapsInRange(addr, size); 126 void UnbindGraphicsStorageBuffers(size_t stage);
169 return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
170 return map->is_modified && map->is_registered;
171 });
172 }
173 127
174 /// Mark the specified region as being invalidated 128 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
175 void InvalidateRegion(VAddr addr, u64 size) { 129 bool is_written);
176 std::lock_guard lock{mutex};
177 130
178 for (auto& object : GetMapsInRange(addr, size)) { 131 void UnbindComputeStorageBuffers();
179 if (object->is_registered) {
180 Unregister(object);
181 }
182 }
183 }
184 132
185 void OnCPUWrite(VAddr addr, std::size_t size) { 133 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
186 std::lock_guard lock{mutex}; 134 bool is_written);
187 135
188 for (MapInterval* object : GetMapsInRange(addr, size)) { 136 void FlushCachedWrites();
189 if (object->is_memory_marked && object->is_registered) {
190 UnmarkMemory(object);
191 object->is_sync_pending = true;
192 marked_for_unregister.emplace_back(object);
193 }
194 }
195 }
196 137
197 void SyncGuestHost() { 138 /// Return true when there are uncommitted buffers to be downloaded
198 std::lock_guard lock{mutex}; 139 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
199 140
200 for (auto& object : marked_for_unregister) { 141 /// Return true when the caller should wait for async downloads
201 if (object->is_registered) { 142 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
202 object->is_sync_pending = false; 143
203 Unregister(object); 144 /// Commit asynchronous downloads
204 } 145 void CommitAsyncFlushes();
146
147 /// Pop asynchronous downloads
148 void PopAsyncFlushes();
149
150 /// Return true when a CPU region is modified from the GPU
151 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
152
153 std::mutex mutex;
154
155private:
156 template <typename Func>
157 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
158 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
159 const int disabled_bits = std::countr_zero(enabled_mask);
160 index += disabled_bits;
161 enabled_mask >>= disabled_bits;
162 func(index);
205 } 163 }
206 marked_for_unregister.clear();
207 } 164 }
208 165
209 void CommitAsyncFlushes() { 166 template <typename Func>
210 if (uncommitted_flushes) { 167 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
211 auto commit_list = std::make_shared<std::list<MapInterval*>>(); 168 const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE);
212 for (MapInterval* map : *uncommitted_flushes) { 169 for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) {
213 if (map->is_registered && map->is_modified) { 170 const BufferId buffer_id = page_table[page];
214 // TODO(Blinkhawk): Implement backend asynchronous flushing 171 if (!buffer_id) {
215 // AsyncFlushMap(map) 172 ++page;
216 commit_list->push_back(map); 173 continue;
217 }
218 }
219 if (!commit_list->empty()) {
220 committed_flushes.push_back(commit_list);
221 } else {
222 committed_flushes.emplace_back();
223 } 174 }
224 } else { 175 Buffer& buffer = slot_buffers[buffer_id];
225 committed_flushes.emplace_back(); 176 func(buffer_id, buffer);
177
178 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
179 page = Common::DivCeil(end_addr, PAGE_SIZE);
226 } 180 }
227 uncommitted_flushes.reset();
228 } 181 }
229 182
230 bool ShouldWaitAsyncFlushes() const { 183 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
231 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 184 return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
185 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
232 } 186 }
233 187
234 bool HasUncommittedFlushes() const { 188 void BindHostIndexBuffer();
235 return uncommitted_flushes != nullptr;
236 }
237 189
238 void PopAsyncFlushes() { 190 void BindHostVertexBuffers();
239 if (committed_flushes.empty()) {
240 return;
241 }
242 auto& flush_list = committed_flushes.front();
243 if (!flush_list) {
244 committed_flushes.pop_front();
245 return;
246 }
247 for (MapInterval* map : *flush_list) {
248 if (map->is_registered) {
249 // TODO(Blinkhawk): Replace this for reading the asynchronous flush
250 FlushMap(map);
251 }
252 }
253 committed_flushes.pop_front();
254 }
255 191
256 virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; 192 void BindHostGraphicsUniformBuffers(size_t stage);
257 193
258protected: 194 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
259 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
260 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
261 StreamBuffer& stream_buffer_)
262 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
263 stream_buffer{stream_buffer_} {}
264 195
265 ~BufferCache() = default; 196 void BindHostGraphicsStorageBuffers(size_t stage);
266 197
267 virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; 198 void BindHostTransformFeedbackBuffers();
268 199
269 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { 200 void BindHostComputeUniformBuffers();
270 return {};
271 }
272 201
273 /// Register an object into the cache 202 void BindHostComputeStorageBuffers();
274 MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
275 const VAddr cpu_addr = new_map.start;
276 if (!cpu_addr) {
277 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
278 new_map.gpu_addr);
279 return nullptr;
280 }
281 const std::size_t size = new_map.end - new_map.start;
282 new_map.is_registered = true;
283 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
284 new_map.is_memory_marked = true;
285 if (inherit_written) {
286 MarkRegionAsWritten(new_map.start, new_map.end - 1);
287 new_map.is_written = true;
288 }
289 MapInterval* const storage = mapped_addresses_allocator.Allocate();
290 *storage = new_map;
291 mapped_addresses.insert(*storage);
292 return storage;
293 }
294 203
295 void UnmarkMemory(MapInterval* map) { 204 void DoUpdateGraphicsBuffers(bool is_indexed);
296 if (!map->is_memory_marked) { 205
297 return; 206 void DoUpdateComputeBuffers();
298 } 207
299 const std::size_t size = map->end - map->start; 208 void UpdateIndexBuffer();
300 rasterizer.UpdatePagesCachedCount(map->start, size, -1); 209
301 map->is_memory_marked = false; 210 void UpdateVertexBuffers();
302 } 211
303 212 void UpdateVertexBuffer(u32 index);
304 /// Unregisters an object from the cache 213
305 void Unregister(MapInterval* map) { 214 void UpdateUniformBuffers(size_t stage);
306 UnmarkMemory(map); 215
307 map->is_registered = false; 216 void UpdateStorageBuffers(size_t stage);
308 if (map->is_sync_pending) { 217
309 map->is_sync_pending = false; 218 void UpdateTransformFeedbackBuffers();
310 marked_for_unregister.remove(map); 219
220 void UpdateTransformFeedbackBuffer(u32 index);
221
222 void UpdateComputeUniformBuffers();
223
224 void UpdateComputeStorageBuffers();
225
226 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
227
228 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
229
230 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
231
232 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
233
234 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
235
236 void Register(BufferId buffer_id);
237
238 void Unregister(BufferId buffer_id);
239
240 template <bool insert>
241 void ChangeRegister(BufferId buffer_id);
242
243 void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
244
245 void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
246
247 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
248 std::span<BufferCopy> copies);
249
250 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
251 std::span<const BufferCopy> copies);
252
253 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
254
255 void DeleteBuffer(BufferId buffer_id);
256
257 void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
258
259 void NotifyBufferDeletion();
260
261 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
262
263 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
264
265 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
266
267 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
268
269 VideoCore::RasterizerInterface& rasterizer;
270 Tegra::Engines::Maxwell3D& maxwell3d;
271 Tegra::Engines::KeplerCompute& kepler_compute;
272 Tegra::MemoryManager& gpu_memory;
273 Core::Memory::Memory& cpu_memory;
274 Runtime& runtime;
275
276 SlotVector<Buffer> slot_buffers;
277 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
278
279 u32 last_index_count = 0;
280
281 Binding index_buffer;
282 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
283 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
284 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
285 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
286
287 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
288 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
289
290 std::array<u32, NUM_STAGES> enabled_uniform_buffers{};
291 u32 enabled_compute_uniform_buffers = 0;
292
293 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
294 std::array<u32, NUM_STAGES> written_storage_buffers{};
295 u32 enabled_compute_storage_buffers = 0;
296 u32 written_compute_storage_buffers = 0;
297
298 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
299
300 bool has_deleted_buffers = false;
301
302 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
303 dirty_uniform_buffers{};
304
305 std::vector<BufferId> cached_write_buffer_ids;
306
307 // TODO: This data structure is not optimal and it should be reworked
308 std::vector<BufferId> uncommitted_downloads;
309 std::deque<std::vector<BufferId>> committed_downloads;
310
311 size_t immediate_buffer_capacity = 0;
312 std::unique_ptr<u8[]> immediate_buffer_alloc;
313
314 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
315};
316
317template <class P>
318BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
319 Tegra::Engines::Maxwell3D& maxwell3d_,
320 Tegra::Engines::KeplerCompute& kepler_compute_,
321 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
322 Runtime& runtime_)
323 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
324 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
325 // Ensure the first slot is used for the null buffer
326 void(slot_buffers.insert(runtime, NullBufferParams{}));
327}
328
329template <class P>
330void BufferCache<P>::TickFrame() {
331 delayed_destruction_ring.Tick();
332}
333
334template <class P>
335void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
336 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
337 buffer.MarkRegionAsCpuModified(cpu_addr, size);
338 });
339}
340
341template <class P>
342void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
343 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
344 if (!buffer.HasCachedWrites()) {
345 cached_write_buffer_ids.push_back(buffer_id);
311 } 346 }
312 if (map->is_written) { 347 buffer.CachedCpuWrite(cpu_addr, size);
313 UnmarkRegionAsWritten(map->start, map->end - 1); 348 });
349}
350
351template <class P>
352void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
353 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
354 boost::container::small_vector<BufferCopy, 1> copies;
355 u64 total_size_bytes = 0;
356 u64 largest_copy = 0;
357 buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
358 copies.push_back(BufferCopy{
359 .src_offset = range_offset,
360 .dst_offset = total_size_bytes,
361 .size = range_size,
362 });
363 total_size_bytes += range_size;
364 largest_copy = std::max(largest_copy, range_size);
365 });
366 if (total_size_bytes == 0) {
367 return;
314 } 368 }
315 const auto it = mapped_addresses.find(*map); 369 MICROPROFILE_SCOPE(GPU_DownloadMemory);
316 ASSERT(it != mapped_addresses.end()); 370
317 mapped_addresses.erase(it); 371 if constexpr (USE_MEMORY_MAPS) {
318 mapped_addresses_allocator.Release(map); 372 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
319 } 373 const u8* const mapped_memory = download_staging.mapped_span.data();
320 374 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
321private: 375 for (BufferCopy& copy : copies) {
322 MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { 376 // Modify copies to have the staging offset in mind
323 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); 377 copy.dst_offset += download_staging.offset;
324 if (overlaps.empty()) {
325 const VAddr cpu_addr_end = cpu_addr + size;
326 if (gpu_memory.IsGranularRange(gpu_addr, size)) {
327 u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
328 block->Upload(block->Offset(cpu_addr), size, host_ptr);
329 } else {
330 staging_buffer.resize(size);
331 gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
332 block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
333 } 378 }
334 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); 379 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
335 } 380 runtime.Finish();
336 381 for (const BufferCopy& copy : copies) {
337 const VAddr cpu_addr_end = cpu_addr + size; 382 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
338 if (overlaps.size() == 1) { 383 // Undo the modified offset
339 MapInterval* const current_map = overlaps[0]; 384 const u64 dst_offset = copy.dst_offset - download_staging.offset;
340 if (current_map->IsInside(cpu_addr, cpu_addr_end)) { 385 const u8* copy_mapped_memory = mapped_memory + dst_offset;
341 return current_map; 386 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
387 }
388 } else {
389 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
390 for (const BufferCopy& copy : copies) {
391 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
392 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
393 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
342 } 394 }
343 } 395 }
344 VAddr new_start = cpu_addr; 396 });
345 VAddr new_end = cpu_addr_end; 397}
346 bool write_inheritance = false; 398
347 bool modified_inheritance = false; 399template <class P>
348 // Calculate new buffer parameters 400void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
349 for (MapInterval* overlap : overlaps) { 401 u32 size) {
350 new_start = std::min(overlap->start, new_start); 402 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
351 new_end = std::max(overlap->end, new_end); 403 if (!cpu_addr) {
352 write_inheritance |= overlap->is_written; 404 uniform_buffers[stage][index] = NULL_BINDING;
353 modified_inheritance |= overlap->is_modified; 405 return;
406 }
407 const Binding binding{
408 .cpu_addr = *cpu_addr,
409 .size = size,
410 .buffer_id = BufferId{},
411 };
412 uniform_buffers[stage][index] = binding;
413}
414
415template <class P>
416void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
417 MICROPROFILE_SCOPE(GPU_PrepareBuffers);
418 do {
419 has_deleted_buffers = false;
420 DoUpdateGraphicsBuffers(is_indexed);
421 } while (has_deleted_buffers);
422}
423
424template <class P>
425void BufferCache<P>::UpdateComputeBuffers() {
426 MICROPROFILE_SCOPE(GPU_PrepareBuffers);
427 do {
428 has_deleted_buffers = false;
429 DoUpdateComputeBuffers();
430 } while (has_deleted_buffers);
431}
432
433template <class P>
434void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
435 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
436 if (is_indexed) {
437 BindHostIndexBuffer();
438 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
439 const auto& regs = maxwell3d.regs;
440 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
441 runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
354 } 442 }
355 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; 443 }
356 for (auto& overlap : overlaps) { 444 BindHostVertexBuffers();
357 Unregister(overlap); 445 BindHostTransformFeedbackBuffers();
446}
447
448template <class P>
449void BufferCache<P>::BindHostStageBuffers(size_t stage) {
450 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
451 BindHostGraphicsUniformBuffers(stage);
452 BindHostGraphicsStorageBuffers(stage);
453}
454
455template <class P>
456void BufferCache<P>::BindHostComputeBuffers() {
457 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
458 BindHostComputeUniformBuffers();
459 BindHostComputeStorageBuffers();
460}
461
462template <class P>
463void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) {
464 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
465 if (enabled_uniform_buffers[stage] != enabled) {
466 dirty_uniform_buffers[stage] = ~u32{0};
358 } 467 }
359 UpdateBlock(block, new_start, new_end, overlaps); 468 }
360 469 enabled_uniform_buffers[stage] = enabled;
361 const MapInterval new_map{new_start, new_end, new_gpu_addr}; 470}
362 MapInterval* const map = Register(new_map, write_inheritance); 471
363 if (!map) { 472template <class P>
364 return nullptr; 473void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) {
474 enabled_compute_uniform_buffers = enabled;
475}
476
477template <class P>
478void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
479 enabled_storage_buffers[stage] = 0;
480 written_storage_buffers[stage] = 0;
481}
482
483template <class P>
484void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
485 u32 cbuf_offset, bool is_written) {
486 enabled_storage_buffers[stage] |= 1U << ssbo_index;
487 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
488
489 const auto& cbufs = maxwell3d.state.shader_stages[stage];
490 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
491 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
492}
493
494template <class P>
495void BufferCache<P>::UnbindComputeStorageBuffers() {
496 enabled_compute_storage_buffers = 0;
497 written_compute_storage_buffers = 0;
498}
499
500template <class P>
501void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
502 bool is_written) {
503 enabled_compute_storage_buffers |= 1U << ssbo_index;
504 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
505
506 const auto& launch_desc = kepler_compute.launch_description;
507 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
508
509 const auto& cbufs = launch_desc.const_buffer_config;
510 const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
511 compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr);
512}
513
514template <class P>
515void BufferCache<P>::FlushCachedWrites() {
516 for (const BufferId buffer_id : cached_write_buffer_ids) {
517 slot_buffers[buffer_id].FlushCachedWrites();
518 }
519 cached_write_buffer_ids.clear();
520}
521
522template <class P>
523bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
524 return !uncommitted_downloads.empty();
525}
526
527template <class P>
528bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
529 return !committed_downloads.empty() && !committed_downloads.front().empty();
530}
531
532template <class P>
533void BufferCache<P>::CommitAsyncFlushes() {
534 // This is intentionally passing the value by copy
535 committed_downloads.push_front(uncommitted_downloads);
536 uncommitted_downloads.clear();
537}
538
539template <class P>
540void BufferCache<P>::PopAsyncFlushes() {
541 if (committed_downloads.empty()) {
542 return;
543 }
544 auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
545 const std::span<const BufferId> download_ids = committed_downloads.back();
546 if (download_ids.empty()) {
547 return;
548 }
549 MICROPROFILE_SCOPE(GPU_DownloadMemory);
550
551 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
552 u64 total_size_bytes = 0;
553 u64 largest_copy = 0;
554 for (const BufferId buffer_id : download_ids) {
555 slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
556 downloads.push_back({
557 BufferCopy{
558 .src_offset = range_offset,
559 .dst_offset = total_size_bytes,
560 .size = range_size,
561 },
562 buffer_id,
563 });
564 total_size_bytes += range_size;
565 largest_copy = std::max(largest_copy, range_size);
566 });
567 }
568 if (downloads.empty()) {
569 return;
570 }
571 if constexpr (USE_MEMORY_MAPS) {
572 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
573 for (auto& [copy, buffer_id] : downloads) {
574 // Have in mind the staging buffer offset for the copy
575 copy.dst_offset += download_staging.offset;
576 const std::array copies{copy};
577 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
365 } 578 }
366 if (modified_inheritance) { 579 runtime.Finish();
367 map->MarkAsModified(true, GetModifiedTicks()); 580 for (const auto [copy, buffer_id] : downloads) {
368 if (Settings::IsGPULevelHigh() && 581 const Buffer& buffer = slot_buffers[buffer_id];
369 Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 582 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
370 MarkForAsyncFlush(map); 583 // Undo the modified offset
371 } 584 const u64 dst_offset = copy.dst_offset - download_staging.offset;
585 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
586 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
587 }
588 } else {
589 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
590 for (const auto [copy, buffer_id] : downloads) {
591 Buffer& buffer = slot_buffers[buffer_id];
592 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
593 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
594 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
372 } 595 }
373 return map;
374 } 596 }
375 597}
376 void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { 598
377 const IntervalType base_interval{start, end}; 599template <class P>
378 IntervalSet interval_set{}; 600bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
379 interval_set.add(base_interval); 601 const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
380 for (auto& overlap : overlaps) { 602 for (u64 page = addr >> PAGE_BITS; page < page_end;) {
381 const IntervalType subtract{overlap->start, overlap->end}; 603 const BufferId image_id = page_table[page];
382 interval_set.subtract(subtract); 604 if (!image_id) {
605 ++page;
606 continue;
383 } 607 }
384 for (auto& interval : interval_set) { 608 Buffer& buffer = slot_buffers[image_id];
385 const std::size_t size = interval.upper() - interval.lower(); 609 if (buffer.IsRegionGpuModified(addr, size)) {
386 if (size == 0) { 610 return true;
387 continue;
388 }
389 staging_buffer.resize(size);
390 cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
391 block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
392 } 611 }
612 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
613 page = Common::DivCeil(end_addr, PAGE_SIZE);
393 } 614 }
394 615 return false;
395 VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { 616}
396 VectorMapInterval result; 617
397 if (size == 0) { 618template <class P>
398 return result; 619void BufferCache<P>::BindHostIndexBuffer() {
620 Buffer& buffer = slot_buffers[index_buffer.buffer_id];
621 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
622 const u32 size = index_buffer.size;
623 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
624 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
625 runtime.BindIndexBuffer(buffer, offset, size);
626 } else {
627 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
628 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
629 buffer, offset, size);
630 }
631}
632
633template <class P>
634void BufferCache<P>::BindHostVertexBuffers() {
635 auto& flags = maxwell3d.dirty.flags;
636 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
637 const Binding& binding = vertex_buffers[index];
638 Buffer& buffer = slot_buffers[binding.buffer_id];
639 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
640 if (!flags[Dirty::VertexBuffer0 + index]) {
641 continue;
399 } 642 }
643 flags[Dirty::VertexBuffer0 + index] = false;
400 644
401 const VAddr addr_end = addr + size; 645 const u32 stride = maxwell3d.regs.vertex_array[index].stride;
402 auto it = mapped_addresses.lower_bound(addr); 646 const u32 offset = buffer.Offset(binding.cpu_addr);
403 if (it != mapped_addresses.begin()) { 647 runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
404 --it; 648 }
649}
650
651template <class P>
652void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
653 u32 dirty = ~0U;
654 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
655 dirty = std::exchange(dirty_uniform_buffers[stage], 0);
656 }
657 u32 binding_index = 0;
658 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
659 const bool needs_bind = ((dirty >> index) & 1) != 0;
660 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
661 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
662 ++binding_index;
405 } 663 }
406 while (it != mapped_addresses.end() && it->start < addr_end) { 664 });
407 if (it->Overlaps(addr, addr_end)) { 665}
408 result.push_back(&*it); 666
667template <class P>
668void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
669 bool needs_bind) {
670 const Binding& binding = uniform_buffers[stage][index];
671 const VAddr cpu_addr = binding.cpu_addr;
672 const u32 size = binding.size;
673 Buffer& buffer = slot_buffers[binding.buffer_id];
674 if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
675 if constexpr (IS_OPENGL) {
676 if (runtime.HasFastBufferSubData()) {
677 // Fast path for Nvidia
678 if (!HasFastUniformBufferBound(stage, binding_index)) {
679 // We only have to bind when the currently bound buffer is not the fast version
680 runtime.BindFastUniformBuffer(stage, binding_index, size);
681 }
682 const auto span = ImmediateBufferWithData(cpu_addr, size);
683 runtime.PushFastUniformBuffer(stage, binding_index, span);
684 return;
409 } 685 }
410 ++it;
411 } 686 }
412 return result; 687 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
413 }
414 688
415 /// Returns a ticks counter used for tracking when cached objects were last modified 689 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
416 u64 GetModifiedTicks() { 690 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
417 return ++modified_ticks; 691 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
692 return;
418 } 693 }
419 694 // Classic cached path
420 void FlushMap(MapInterval* map) { 695 SynchronizeBuffer(buffer, cpu_addr, size);
421 const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); 696 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
422 ASSERT_OR_EXECUTE(it != blocks.end(), return;); 697 // Skip binding if it's not needed and if the bound buffer is not the fast version
423 698 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
424 std::shared_ptr<Buffer> block = it->second; 699 return;
425
426 const std::size_t size = map->end - map->start;
427 staging_buffer.resize(size);
428 block->Download(block->Offset(map->start), size, staging_buffer.data());
429 cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
430 map->MarkAsModified(false, 0);
431 } 700 }
701 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
432 702
433 template <typename Callable> 703 const u32 offset = buffer.Offset(cpu_addr);
434 BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { 704 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
435 AlignBuffer(alignment); 705 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
436 const std::size_t uploaded_offset = buffer_offset; 706 } else {
437 callable(buffer_ptr); 707 runtime.BindUniformBuffer(buffer, offset, size);
438
439 buffer_ptr += size;
440 buffer_offset += size;
441 return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
442 } 708 }
709}
710
711template <class P>
712void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
713 u32 binding_index = 0;
714 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
715 const Binding& binding = storage_buffers[stage][index];
716 Buffer& buffer = slot_buffers[binding.buffer_id];
717 const u32 size = binding.size;
718 SynchronizeBuffer(buffer, binding.cpu_addr, size);
719
720 const u32 offset = buffer.Offset(binding.cpu_addr);
721 const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
722 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
723 runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
724 ++binding_index;
725 } else {
726 runtime.BindStorageBuffer(buffer, offset, size, is_written);
727 }
728 });
729}
443 730
444 void AlignBuffer(std::size_t alignment) { 731template <class P>
445 // Align the offset, not the mapped pointer 732void BufferCache<P>::BindHostTransformFeedbackBuffers() {
446 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); 733 if (maxwell3d.regs.tfb_enabled == 0) {
447 buffer_ptr += offset_aligned - buffer_offset; 734 return;
448 buffer_offset = offset_aligned;
449 } 735 }
736 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
737 const Binding& binding = transform_feedback_buffers[index];
738 Buffer& buffer = slot_buffers[binding.buffer_id];
739 const u32 size = binding.size;
740 SynchronizeBuffer(buffer, binding.cpu_addr, size);
741
742 const u32 offset = buffer.Offset(binding.cpu_addr);
743 runtime.BindTransformFeedbackBuffer(index, buffer, offset, size);
744 }
745}
450 746
451 std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { 747template <class P>
452 const std::size_t old_size = buffer->Size(); 748void BufferCache<P>::BindHostComputeUniformBuffers() {
453 const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; 749 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
454 const VAddr cpu_addr = buffer->CpuAddr(); 750 // Mark all uniform buffers as dirty
455 std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); 751 dirty_uniform_buffers.fill(~u32{0});
456 new_buffer->CopyFrom(*buffer, 0, 0, old_size); 752 }
457 QueueDestruction(std::move(buffer)); 753 u32 binding_index = 0;
458 754 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
459 const VAddr cpu_addr_end = cpu_addr + new_size - 1; 755 const Binding& binding = compute_uniform_buffers[index];
460 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; 756 Buffer& buffer = slot_buffers[binding.buffer_id];
461 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { 757 const u32 size = binding.size;
462 blocks.insert_or_assign(page_start, new_buffer); 758 SynchronizeBuffer(buffer, binding.cpu_addr, size);
759
760 const u32 offset = buffer.Offset(binding.cpu_addr);
761 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
762 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
763 ++binding_index;
764 } else {
765 runtime.BindUniformBuffer(buffer, offset, size);
463 } 766 }
767 });
768}
769
770template <class P>
771void BufferCache<P>::BindHostComputeStorageBuffers() {
772 u32 binding_index = 0;
773 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
774 const Binding& binding = compute_storage_buffers[index];
775 Buffer& buffer = slot_buffers[binding.buffer_id];
776 const u32 size = binding.size;
777 SynchronizeBuffer(buffer, binding.cpu_addr, size);
778
779 const u32 offset = buffer.Offset(binding.cpu_addr);
780 const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
781 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
782 runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
783 ++binding_index;
784 } else {
785 runtime.BindStorageBuffer(buffer, offset, size, is_written);
786 }
787 });
788}
464 789
465 return new_buffer; 790template <class P>
791void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
792 if (is_indexed) {
793 UpdateIndexBuffer();
466 } 794 }
795 UpdateVertexBuffers();
796 UpdateTransformFeedbackBuffers();
797 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
798 UpdateUniformBuffers(stage);
799 UpdateStorageBuffers(stage);
800 }
801}
802
803template <class P>
804void BufferCache<P>::DoUpdateComputeBuffers() {
805 UpdateComputeUniformBuffers();
806 UpdateComputeStorageBuffers();
807}
808
809template <class P>
810void BufferCache<P>::UpdateIndexBuffer() {
811 // We have to check for the dirty flags and index count
812 // The index count is currently changed without updating the dirty flags
813 const auto& index_array = maxwell3d.regs.index_array;
814 auto& flags = maxwell3d.dirty.flags;
815 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
816 return;
817 }
818 flags[Dirty::IndexBuffer] = false;
819 last_index_count = index_array.count;
820
821 const GPUVAddr gpu_addr_begin = index_array.StartAddress();
822 const GPUVAddr gpu_addr_end = index_array.EndAddress();
823 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
824 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
825 const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
826 const u32 size = std::min(address_size, draw_size);
827 if (size == 0 || !cpu_addr) {
828 index_buffer = NULL_BINDING;
829 return;
830 }
831 index_buffer = Binding{
832 .cpu_addr = *cpu_addr,
833 .size = size,
834 .buffer_id = FindBuffer(*cpu_addr, size),
835 };
836}
467 837
468 std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, 838template <class P>
469 std::shared_ptr<Buffer> second) { 839void BufferCache<P>::UpdateVertexBuffers() {
470 const std::size_t size_1 = first->Size(); 840 auto& flags = maxwell3d.dirty.flags;
471 const std::size_t size_2 = second->Size(); 841 if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
472 const VAddr first_addr = first->CpuAddr(); 842 return;
473 const VAddr second_addr = second->CpuAddr(); 843 }
474 const VAddr new_addr = std::min(first_addr, second_addr); 844 flags[Dirty::VertexBuffers] = false;
475 const std::size_t new_size = size_1 + size_2;
476
477 std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
478 new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
479 new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
480 QueueDestruction(std::move(first));
481 QueueDestruction(std::move(second));
482 845
483 const VAddr cpu_addr_end = new_addr + new_size - 1; 846 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
484 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; 847 UpdateVertexBuffer(index);
485 for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
486 blocks.insert_or_assign(page_start, new_buffer);
487 }
488 return new_buffer;
489 } 848 }
849}
490 850
491 Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { 851template <class P>
492 std::shared_ptr<Buffer> found; 852void BufferCache<P>::UpdateVertexBuffer(u32 index) {
853 if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
854 return;
855 }
856 const auto& array = maxwell3d.regs.vertex_array[index];
857 const auto& limit = maxwell3d.regs.vertex_array_limit[index];
858 const GPUVAddr gpu_addr_begin = array.StartAddress();
859 const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
860 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
861 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
862 const u32 size = address_size; // TODO: Analyze stride and number of vertices
863 if (array.enable == 0 || size == 0 || !cpu_addr) {
864 vertex_buffers[index] = NULL_BINDING;
865 return;
866 }
867 vertex_buffers[index] = Binding{
868 .cpu_addr = *cpu_addr,
869 .size = size,
870 .buffer_id = FindBuffer(*cpu_addr, size),
871 };
872}
873
874template <class P>
875void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
876 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) {
877 Binding& binding = uniform_buffers[stage][index];
878 if (binding.buffer_id) {
879 // Already updated
880 return;
881 }
882 // Mark as dirty
883 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
884 dirty_uniform_buffers[stage] |= 1U << index;
885 }
886 // Resolve buffer
887 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
888 });
889}
890
891template <class P>
892void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
893 const u32 written_mask = written_storage_buffers[stage];
894 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
895 // Resolve buffer
896 Binding& binding = storage_buffers[stage][index];
897 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
898 binding.buffer_id = buffer_id;
899 // Mark buffer as written if needed
900 if (((written_mask >> index) & 1) != 0) {
901 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
902 }
903 });
904}
493 905
494 const VAddr cpu_addr_end = cpu_addr + size - 1; 906template <class P>
495 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; 907void BufferCache<P>::UpdateTransformFeedbackBuffers() {
496 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { 908 if (maxwell3d.regs.tfb_enabled == 0) {
497 auto it = blocks.find(page_start); 909 return;
498 if (it == blocks.end()) { 910 }
499 if (found) { 911 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
500 found = EnlargeBlock(found); 912 UpdateTransformFeedbackBuffer(index);
501 continue; 913 }
502 } 914}
503 const VAddr start_addr = page_start << BLOCK_PAGE_BITS; 915
504 found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); 916template <class P>
505 blocks.insert_or_assign(page_start, found); 917void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
506 continue; 918 const auto& binding = maxwell3d.regs.tfb_bindings[index];
507 } 919 const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
508 if (!found) { 920 const u32 size = binding.buffer_size;
509 found = it->second; 921 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
510 continue; 922 if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
511 } 923 transform_feedback_buffers[index] = NULL_BINDING;
512 if (found != it->second) { 924 return;
513 found = MergeBlocks(std::move(found), it->second); 925 }
926 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
927 transform_feedback_buffers[index] = Binding{
928 .cpu_addr = *cpu_addr,
929 .size = size,
930 .buffer_id = buffer_id,
931 };
932 MarkWrittenBuffer(buffer_id, *cpu_addr, size);
933}
934
935template <class P>
936void BufferCache<P>::UpdateComputeUniformBuffers() {
937 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
938 Binding& binding = compute_uniform_buffers[index];
939 binding = NULL_BINDING;
940 const auto& launch_desc = kepler_compute.launch_description;
941 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
942 const auto& cbuf = launch_desc.const_buffer_config[index];
943 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
944 if (cpu_addr) {
945 binding.cpu_addr = *cpu_addr;
946 binding.size = cbuf.size;
514 } 947 }
515 } 948 }
516 return found.get(); 949 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
950 });
951}
952
953template <class P>
954void BufferCache<P>::UpdateComputeStorageBuffers() {
955 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
956 // Resolve buffer
957 Binding& binding = compute_storage_buffers[index];
958 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
959 binding.buffer_id = buffer_id;
960 // Mark as written if needed
961 if (((written_compute_storage_buffers >> index) & 1) != 0) {
962 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
963 }
964 });
965}
966
967template <class P>
968void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
969 Buffer& buffer = slot_buffers[buffer_id];
970 buffer.MarkRegionAsGpuModified(cpu_addr, size);
971
972 const bool is_accuracy_high = Settings::IsGPULevelHigh();
973 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
974 if (!is_accuracy_high || !is_async) {
975 return;
976 }
977 if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
978 // Already inserted
979 return;
517 } 980 }
981 uncommitted_downloads.push_back(buffer_id);
982}
518 983
519 void MarkRegionAsWritten(VAddr start, VAddr end) { 984template <class P>
520 const u64 page_end = end >> WRITE_PAGE_BIT; 985BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
521 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 986 if (cpu_addr == 0) {
522 if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { 987 return NULL_BUFFER_ID;
523 ++it->second; 988 }
524 } 989 const u64 page = cpu_addr >> PAGE_BITS;
990 const BufferId buffer_id = page_table[page];
991 if (!buffer_id) {
992 return CreateBuffer(cpu_addr, size);
993 }
994 const Buffer& buffer = slot_buffers[buffer_id];
995 if (buffer.IsInBounds(cpu_addr, size)) {
996 return buffer_id;
997 }
998 return CreateBuffer(cpu_addr, size);
999}
1000
1001template <class P>
1002typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
1003 u32 wanted_size) {
1004 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1005 std::vector<BufferId> overlap_ids;
1006 VAddr begin = cpu_addr;
1007 VAddr end = cpu_addr + wanted_size;
1008 int stream_score = 0;
1009 bool has_stream_leap = false;
1010 for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
1011 const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
1012 if (!overlap_id) {
1013 continue;
1014 }
1015 Buffer& overlap = slot_buffers[overlap_id];
1016 if (overlap.IsPicked()) {
1017 continue;
1018 }
1019 overlap_ids.push_back(overlap_id);
1020 overlap.Pick();
1021 const VAddr overlap_cpu_addr = overlap.CpuAddr();
1022 if (overlap_cpu_addr < begin) {
1023 cpu_addr = begin = overlap_cpu_addr;
1024 }
1025 end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
1026
1027 stream_score += overlap.StreamScore();
1028 if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
1029 // When this memory region has been joined a bunch of times, we assume it's being used
1030 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1031 has_stream_leap = true;
1032 end += PAGE_SIZE * 256;
525 } 1033 }
526 } 1034 }
527 1035 return OverlapResult{
528 void UnmarkRegionAsWritten(VAddr start, VAddr end) { 1036 .ids = std::move(overlap_ids),
529 const u64 page_end = end >> WRITE_PAGE_BIT; 1037 .begin = begin,
530 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 1038 .end = end,
531 auto it = written_pages.find(page_start); 1039 .has_stream_leap = has_stream_leap,
532 if (it != written_pages.end()) { 1040 };
533 if (it->second > 1) { 1041}
534 --it->second; 1042
535 } else { 1043template <class P>
536 written_pages.erase(it); 1044void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
537 } 1045 bool accumulate_stream_score) {
538 } 1046 Buffer& new_buffer = slot_buffers[new_buffer_id];
1047 Buffer& overlap = slot_buffers[overlap_id];
1048 if (accumulate_stream_score) {
1049 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
1050 }
1051 std::vector<BufferCopy> copies;
1052 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
1053 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
1054 copies.push_back(BufferCopy{
1055 .src_offset = begin,
1056 .dst_offset = dst_base_offset + begin,
1057 .size = range_size,
1058 });
1059 new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
1060 new_buffer.MarkRegionAsGpuModified(begin, range_size);
1061 });
1062 if (!copies.empty()) {
1063 runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
1064 }
1065 ReplaceBufferDownloads(overlap_id, new_buffer_id);
1066 DeleteBuffer(overlap_id);
1067}
1068
1069template <class P>
1070BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1071 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1072 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1073 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1074 for (const BufferId overlap_id : overlap.ids) {
1075 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1076 }
1077 Register(new_buffer_id);
1078 return new_buffer_id;
1079}
1080
1081template <class P>
1082void BufferCache<P>::Register(BufferId buffer_id) {
1083 ChangeRegister<true>(buffer_id);
1084}
1085
1086template <class P>
1087void BufferCache<P>::Unregister(BufferId buffer_id) {
1088 ChangeRegister<false>(buffer_id);
1089}
1090
1091template <class P>
1092template <bool insert>
1093void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1094 const Buffer& buffer = slot_buffers[buffer_id];
1095 const VAddr cpu_addr_begin = buffer.CpuAddr();
1096 const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes();
1097 const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
1098 const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
1099 for (u64 page = page_begin; page != page_end; ++page) {
1100 if constexpr (insert) {
1101 page_table[page] = buffer_id;
1102 } else {
1103 page_table[page] = BufferId{};
539 } 1104 }
540 } 1105 }
1106}
541 1107
542 bool IsRegionWritten(VAddr start, VAddr end) const { 1108template <class P>
543 const u64 page_end = end >> WRITE_PAGE_BIT; 1109void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
544 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { 1110 if (buffer.CpuAddr() == 0) {
545 if (written_pages.contains(page_start)) { 1111 return;
546 return true; 1112 }
1113 SynchronizeBufferImpl(buffer, cpu_addr, size);
1114}
1115
1116template <class P>
1117void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
1118 boost::container::small_vector<BufferCopy, 4> copies;
1119 u64 total_size_bytes = 0;
1120 u64 largest_copy = 0;
1121 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
1122 copies.push_back(BufferCopy{
1123 .src_offset = total_size_bytes,
1124 .dst_offset = range_offset,
1125 .size = range_size,
1126 });
1127 total_size_bytes += range_size;
1128 largest_copy = std::max(largest_copy, range_size);
1129 });
1130 if (total_size_bytes == 0) {
1131 return;
1132 }
1133 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1134 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1135}
1136
1137template <class P>
1138void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1139 std::span<BufferCopy> copies) {
1140 if constexpr (USE_MEMORY_MAPS) {
1141 MappedUploadMemory(buffer, total_size_bytes, copies);
1142 } else {
1143 ImmediateUploadMemory(buffer, largest_copy, copies);
1144 }
1145}
1146
1147template <class P>
1148void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
1149 std::span<const BufferCopy> copies) {
1150 std::span<u8> immediate_buffer;
1151 for (const BufferCopy& copy : copies) {
1152 std::span<const u8> upload_span;
1153 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1154 if (IsRangeGranular(cpu_addr, copy.size)) {
1155 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
1156 } else {
1157 if (immediate_buffer.empty()) {
1158 immediate_buffer = ImmediateBuffer(largest_copy);
547 } 1159 }
1160 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
1161 upload_span = immediate_buffer.subspan(0, copy.size);
548 } 1162 }
549 return false; 1163 buffer.ImmediateUpload(copy.dst_offset, upload_span);
550 } 1164 }
551 1165}
552 void QueueDestruction(std::shared_ptr<Buffer> buffer) { 1166
553 buffer->SetEpoch(epoch); 1167template <class P>
554 pending_destruction.push(std::move(buffer)); 1168void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1169 std::span<BufferCopy> copies) {
1170 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1171 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1172 for (BufferCopy& copy : copies) {
1173 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1174 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1175 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1176
1177 // Apply the staging offset
1178 copy.src_offset += upload_staging.offset;
555 } 1179 }
556 1180 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
557 void MarkForAsyncFlush(MapInterval* map) { 1181}
558 if (!uncommitted_flushes) { 1182
559 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); 1183template <class P>
1184void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1185 const auto scalar_replace = [buffer_id](Binding& binding) {
1186 if (binding.buffer_id == buffer_id) {
1187 binding.buffer_id = BufferId{};
1188 }
1189 };
1190 const auto replace = [scalar_replace](std::span<Binding> bindings) {
1191 std::ranges::for_each(bindings, scalar_replace);
1192 };
1193 scalar_replace(index_buffer);
1194 replace(vertex_buffers);
1195 std::ranges::for_each(uniform_buffers, replace);
1196 std::ranges::for_each(storage_buffers, replace);
1197 replace(transform_feedback_buffers);
1198 replace(compute_uniform_buffers);
1199 replace(compute_storage_buffers);
1200 std::erase(cached_write_buffer_ids, buffer_id);
1201
1202 // Mark the whole buffer as CPU written to stop tracking CPU writes
1203 Buffer& buffer = slot_buffers[buffer_id];
1204 buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
1205
1206 Unregister(buffer_id);
1207 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
1208
1209 NotifyBufferDeletion();
1210}
1211
1212template <class P>
1213void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
1214 const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
1215 std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
1216 if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
1217 buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
560 } 1218 }
561 uncommitted_flushes->insert(map); 1219 };
1220 replace(uncommitted_downloads);
1221 std::ranges::for_each(committed_downloads, replace);
1222}
1223
1224template <class P>
1225void BufferCache<P>::NotifyBufferDeletion() {
1226 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1227 dirty_uniform_buffers.fill(~u32{0});
562 } 1228 }
1229 auto& flags = maxwell3d.dirty.flags;
1230 flags[Dirty::IndexBuffer] = true;
1231 flags[Dirty::VertexBuffers] = true;
1232 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
1233 flags[Dirty::VertexBuffer0 + index] = true;
1234 }
1235 has_deleted_buffers = true;
1236}
1237
1238template <class P>
1239typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
1240 const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
1241 const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
1242 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1243 if (!cpu_addr || size == 0) {
1244 return NULL_BINDING;
1245 }
1246 // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range.
1247 // It exists due to some games like Astral Chain operate out of bounds.
1248 // Binding the whole map range would be technically correct, but games have large maps that make
1249 // this approach unaffordable for now.
1250 static constexpr u32 arbitrary_extra_bytes = 0xc000;
1251 const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr));
1252 const Binding binding{
1253 .cpu_addr = *cpu_addr,
1254 .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end),
1255 .buffer_id = BufferId{},
1256 };
1257 return binding;
1258}
1259
1260template <class P>
1261std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
1262 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
1263 if (IsRangeGranular(cpu_addr, size) ||
1264 base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) {
1265 return std::span(base_pointer, size);
1266 } else {
1267 const std::span<u8> span = ImmediateBuffer(size);
1268 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
1269 return span;
1270 }
1271}
563 1272
564 VideoCore::RasterizerInterface& rasterizer; 1273template <class P>
565 Tegra::MemoryManager& gpu_memory; 1274std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
566 Core::Memory::Memory& cpu_memory; 1275 if (wanted_capacity > immediate_buffer_capacity) {
567 StreamBuffer& stream_buffer; 1276 immediate_buffer_capacity = wanted_capacity;
568 1277 immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity);
569 u8* buffer_ptr = nullptr; 1278 }
570 u64 buffer_offset = 0; 1279 return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity);
571 u64 buffer_offset_base = 0; 1280}
572 1281
573 MapIntervalAllocator mapped_addresses_allocator; 1282template <class P>
574 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> 1283bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
575 mapped_addresses; 1284 if constexpr (IS_OPENGL) {
576 1285 return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
577 std::unordered_map<u64, u32> written_pages; 1286 } else {
578 std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; 1287 // Only OpenGL has fast uniform buffers
579 1288 return false;
580 std::queue<std::shared_ptr<Buffer>> pending_destruction; 1289 }
581 u64 epoch = 0; 1290}
582 u64 modified_ticks = 0;
583
584 std::vector<u8> staging_buffer;
585
586 std::list<MapInterval*> marked_for_unregister;
587
588 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
589 std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
590
591 std::recursive_mutex mutex;
592};
593 1291
594} // namespace VideoCommon 1292} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
deleted file mode 100644
index 62587e18a..000000000
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <memory>
9
10#include "video_core/buffer_cache/map_interval.h"
11
12namespace VideoCommon {
13
14MapIntervalAllocator::MapIntervalAllocator() {
15 FillFreeList(first_chunk);
16}
17
18MapIntervalAllocator::~MapIntervalAllocator() = default;
19
20void MapIntervalAllocator::AllocateNewChunk() {
21 *new_chunk = std::make_unique<Chunk>();
22 FillFreeList(**new_chunk);
23 new_chunk = &(*new_chunk)->next;
24}
25
26void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
27 const std::size_t old_size = free_list.size();
28 free_list.resize(old_size + chunk.data.size());
29 std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
30 [](MapInterval& interval) { return &interval; });
31}
32
33} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
deleted file mode 100644
index ef974b08a..000000000
--- a/src/video_core/buffer_cache/map_interval.h
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <vector>
11
12#include <boost/intrusive/set_hook.hpp>
13
14#include "common/common_types.h"
15#include "video_core/gpu.h"
16
17namespace VideoCommon {
18
19struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
20 MapInterval() = default;
21
22 /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
23
24 explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
25 : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
26
27 bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
28 return start <= other_start && other_end <= end;
29 }
30
31 bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
32 return start < other_end && other_start < end;
33 }
34
35 void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
36 is_modified = is_modified_;
37 ticks = ticks_;
38 }
39
40 boost::intrusive::set_member_hook<> member_hook_;
41 VAddr start = 0;
42 VAddr end = 0;
43 GPUVAddr gpu_addr = 0;
44 u64 ticks = 0;
45 bool is_written = false;
46 bool is_modified = false;
47 bool is_registered = false;
48 bool is_memory_marked = false;
49 bool is_sync_pending = false;
50};
51
52struct MapIntervalCompare {
53 constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
54 return lhs.start < rhs.start;
55 }
56};
57
58class MapIntervalAllocator {
59public:
60 MapIntervalAllocator();
61 ~MapIntervalAllocator();
62
63 MapInterval* Allocate() {
64 if (free_list.empty()) {
65 AllocateNewChunk();
66 }
67 MapInterval* const interval = free_list.back();
68 free_list.pop_back();
69 return interval;
70 }
71
72 void Release(MapInterval* interval) {
73 free_list.push_back(interval);
74 }
75
76private:
77 struct Chunk {
78 std::unique_ptr<Chunk> next;
79 std::array<MapInterval, 0x8000> data;
80 };
81
82 void AllocateNewChunk();
83
84 void FillFreeList(Chunk& chunk);
85
86 std::vector<MapInterval*> free_list;
87
88 Chunk first_chunk;
89
90 std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
91};
92
93} // namespace VideoCommon
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 55e632346..2b7569335 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -110,12 +110,10 @@ void Vic::Execute() {
110 converted_frame_buffer.get(), block_height, 0, 0); 110 converted_frame_buffer.get(), block_height, 0, 0);
111 111
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
113 gpu.Maxwell3D().OnMemoryWrite();
114 } else { 113 } else {
115 // send pitch linear frame 114 // send pitch linear frame
116 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, 115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
117 linear_size); 116 linear_size);
118 gpu.Maxwell3D().OnMemoryWrite();
119 } 117 }
120 break; 118 break;
121 } 119 }
@@ -163,7 +161,6 @@ void Vic::Execute() {
163 } 161 }
164 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), 162 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
165 chroma_buffer.size()); 163 chroma_buffer.size());
166 gpu.Maxwell3D().OnMemoryWrite();
167 break; 164 break;
168 } 165 }
169 default: 166 default:
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index b1eaac00c..7149af290 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -12,13 +12,30 @@
12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) 12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
13 13
14namespace VideoCommon::Dirty { 14namespace VideoCommon::Dirty {
15 15namespace {
16using Tegra::Engines::Maxwell3D; 16using Tegra::Engines::Maxwell3D;
17 17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { 18void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
19 static constexpr std::size_t num_array = 3;
20 for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
21 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
22 const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
23
24 FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
25 FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
26 }
27}
28
29void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
30 FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
31}
32
33void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
19 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); 34 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
20 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); 35 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
36}
21 37
38void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
22 static constexpr std::size_t num_per_rt = NUM(rt[0]); 39 static constexpr std::size_t num_per_rt = NUM(rt[0]);
23 static constexpr std::size_t begin = OFF(rt); 40 static constexpr std::size_t begin = OFF(rt);
24 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; 41 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
41 FillBlock(table, OFF(zeta), NUM(zeta), flag); 58 FillBlock(table, OFF(zeta), NUM(zeta), flag);
42 } 59 }
43} 60}
61} // Anonymous namespace
62
63void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
64 SetupDirtyVertexBuffers(tables);
65 SetupIndexBuffer(tables);
66 SetupDirtyDescriptors(tables);
67 SetupDirtyRenderTargets(tables);
68}
44 69
45} // namespace VideoCommon::Dirty 70} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 875527ddd..702688ace 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -30,6 +30,12 @@ enum : u8 {
30 ColorBuffer7, 30 ColorBuffer7,
31 ZetaBuffer, 31 ZetaBuffer,
32 32
33 VertexBuffers,
34 VertexBuffer0,
35 VertexBuffer31 = VertexBuffer0 + 31,
36
37 IndexBuffer,
38
33 LastCommonEntry, 39 LastCommonEntry,
34}; 40};
35 41
@@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
47 FillBlock(tables[1], begin, num, index_b); 53 FillBlock(tables[1], begin, num, index_b);
48} 54}
49 55
50void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); 56void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
51 57
52} // namespace VideoCommon::Dirty 58} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 2c8b20024..8b33c04ab 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
23 MICROPROFILE_SCOPE(DispatchCalls); 23 MICROPROFILE_SCOPE(DispatchCalls);
24 24
25 gpu.SyncGuestHost(); 25 gpu.SyncGuestHost();
26 // On entering GPU code, assume all memory may be touched by the ARM core.
27 gpu.Maxwell3D().OnMemoryWrite();
28 26
29 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
30 28
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index a01d334ad..0f640fdae 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {
18 18
19Fermi2D::~Fermi2D() = default; 19Fermi2D::~Fermi2D() = default;
20 20
21void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 21void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
22 rasterizer = &rasterizer_; 22 rasterizer = rasterizer_;
23} 23}
24 24
25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0de3280a2..c808a577d 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -38,7 +38,7 @@ public:
38 ~Fermi2D(); 38 ~Fermi2D();
39 39
40 /// Binds a rasterizer to this engine. 40 /// Binds a rasterizer to this engine.
41 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 41 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
42 42
43 /// Write the value to the register identified by method. 43 /// Write the value to the register identified by method.
44 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; 44 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index ba387506e..a9b75091e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage
21 21
22KeplerCompute::~KeplerCompute() = default; 22KeplerCompute::~KeplerCompute() = default;
23 23
24void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 24void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
25 rasterizer = &rasterizer_; 25 rasterizer = rasterizer_;
26} 26}
27 27
28void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 28void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
39 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 39 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
40 upload_state.ProcessData(method_argument, is_last_call); 40 upload_state.ProcessData(method_argument, is_last_call);
41 if (is_last_call) { 41 if (is_last_call) {
42 system.GPU().Maxwell3D().OnMemoryWrite();
43 } 42 }
44 break; 43 break;
45 } 44 }
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 9f0a7b76d..7c40cba38 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -46,7 +46,7 @@ public:
46 ~KeplerCompute(); 46 ~KeplerCompute();
47 47
48 /// Binds a rasterizer to this engine. 48 /// Binds a rasterizer to this engine.
49 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 49 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
50 50
51 static constexpr std::size_t NumConstBuffers = 8; 51 static constexpr std::size_t NumConstBuffers = 8;
52 52
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 9911140e9..560551157 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
33 case KEPLERMEMORY_REG_INDEX(data): { 33 case KEPLERMEMORY_REG_INDEX(data): {
34 upload_state.ProcessData(method_argument, is_last_call); 34 upload_state.ProcessData(method_argument, is_last_call);
35 if (is_last_call) { 35 if (is_last_call) {
36 system.GPU().Maxwell3D().OnMemoryWrite();
37 } 36 }
38 break; 37 break;
39 } 38 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 116ad1722..75517a4f7 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
30 30
31Maxwell3D::~Maxwell3D() = default; 31Maxwell3D::~Maxwell3D() = default;
32 32
33void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 33void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
34 rasterizer = &rasterizer_; 34 rasterizer = rasterizer_;
35} 35}
36 36
37void Maxwell3D::InitializeRegisterDefaults() { 37void Maxwell3D::InitializeRegisterDefaults() {
@@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
223 case MAXWELL3D_REG_INDEX(data_upload): 223 case MAXWELL3D_REG_INDEX(data_upload):
224 upload_state.ProcessData(argument, is_last_call); 224 upload_state.ProcessData(argument, is_last_call);
225 if (is_last_call) { 225 if (is_last_call) {
226 OnMemoryWrite();
227 } 226 }
228 return; 227 return;
229 case MAXWELL3D_REG_INDEX(fragment_barrier): 228 case MAXWELL3D_REG_INDEX(fragment_barrier):
@@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
570 } 569 }
571} 570}
572 571
573void Maxwell3D::ProcessCBBind(std::size_t stage_index) { 572void Maxwell3D::ProcessCBBind(size_t stage_index) {
574 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. 573 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
575 auto& shader = state.shader_stages[stage_index]; 574 const auto& bind_data = regs.cb_bind[stage_index];
576 auto& bind_data = regs.cb_bind[stage_index]; 575 auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
577
578 ASSERT(bind_data.index < Regs::MaxConstBuffers);
579 auto& buffer = shader.const_buffers[bind_data.index];
580
581 buffer.enabled = bind_data.valid.Value() != 0; 576 buffer.enabled = bind_data.valid.Value() != 0;
582 buffer.address = regs.const_buffer.BufferAddress(); 577 buffer.address = regs.const_buffer.BufferAddress();
583 buffer.size = regs.const_buffer.cb_size; 578 buffer.size = regs.const_buffer.cb_size;
579
580 const bool is_enabled = bind_data.valid.Value() != 0;
581 const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
582 const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
583 rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
584} 584}
585 585
586void Maxwell3D::ProcessCBData(u32 value) { 586void Maxwell3D::ProcessCBData(u32 value) {
@@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {
635 635
636 const u32 id = cb_data_state.id; 636 const u32 id = cb_data_state.id;
637 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); 637 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
638 OnMemoryWrite();
639 638
640 cb_data_state.id = null_cb_data; 639 cb_data_state.id = null_cb_data;
641 cb_data_state.current = null_cb_data; 640 cb_data_state.current = null_cb_data;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 002d1b3f9..ffed42a29 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -55,7 +55,7 @@ public:
55 ~Maxwell3D(); 55 ~Maxwell3D();
56 56
57 /// Binds a rasterizer to this engine. 57 /// Binds a rasterizer to this engine.
58 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 58 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
59 59
60 /// Register structure of the Maxwell3D engine. 60 /// Register structure of the Maxwell3D engine.
61 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. 61 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -1314,8 +1314,7 @@ public:
1314 1314
1315 GPUVAddr LimitAddress() const { 1315 GPUVAddr LimitAddress() const {
1316 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | 1316 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
1317 limit_low) + 1317 limit_low);
1318 1;
1319 } 1318 }
1320 } vertex_array_limit[NumVertexArrays]; 1319 } vertex_array_limit[NumVertexArrays];
1321 1320
@@ -1403,6 +1402,7 @@ public:
1403 }; 1402 };
1404 1403
1405 std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; 1404 std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
1405
1406 u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. 1406 u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
1407 }; 1407 };
1408 1408
@@ -1452,11 +1452,6 @@ public:
1452 return *rasterizer; 1452 return *rasterizer;
1453 } 1453 }
1454 1454
1455 /// Notify a memory write has happened.
1456 void OnMemoryWrite() {
1457 dirty.flags |= dirty.on_write_stores;
1458 }
1459
1460 enum class MMEDrawMode : u32 { 1455 enum class MMEDrawMode : u32 {
1461 Undefined, 1456 Undefined,
1462 Array, 1457 Array,
@@ -1478,7 +1473,6 @@ public:
1478 using Tables = std::array<Table, 2>; 1473 using Tables = std::array<Table, 2>;
1479 1474
1480 Flags flags; 1475 Flags flags;
1481 Flags on_write_stores;
1482 Tables tables{}; 1476 Tables tables{};
1483 } dirty; 1477 } dirty;
1484 1478
@@ -1541,7 +1535,7 @@ private:
1541 void FinishCBData(); 1535 void FinishCBData();
1542 1536
1543 /// Handles a write to the CB_BIND register. 1537 /// Handles a write to the CB_BIND register.
1544 void ProcessCBBind(std::size_t stage_index); 1538 void ProcessCBBind(size_t stage_index);
1545 1539
1546 /// Handles a write to the VERTEX_END_GL register, triggering a draw. 1540 /// Handles a write to the VERTEX_END_GL register, triggering a draw.
1547 void DrawArrays(); 1541 void DrawArrays();
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ba750748c..a2f19559f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
60 return; 60 return;
61 } 61 }
62 62
63 // All copies here update the main memory, so mark all rasterizer states as invalid.
64 system.GPU().Maxwell3D().OnMemoryWrite();
65
66 if (is_src_pitch && is_dst_pitch) { 63 if (is_src_pitch && is_dst_pitch) {
67 CopyPitchToPitch(); 64 CopyPitchToPitch();
68 } else { 65 } else {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 3512283ff..f055b61e9 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -143,22 +143,26 @@ private:
143 } 143 }
144 144
145 bool ShouldWait() const { 145 bool ShouldWait() const {
146 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
146 return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || 147 return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
147 query_cache.ShouldWaitAsyncFlushes(); 148 query_cache.ShouldWaitAsyncFlushes();
148 } 149 }
149 150
150 bool ShouldFlush() const { 151 bool ShouldFlush() const {
152 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
151 return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || 153 return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
152 query_cache.HasUncommittedFlushes(); 154 query_cache.HasUncommittedFlushes();
153 } 155 }
154 156
155 void PopAsyncFlushes() { 157 void PopAsyncFlushes() {
158 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
156 texture_cache.PopAsyncFlushes(); 159 texture_cache.PopAsyncFlushes();
157 buffer_cache.PopAsyncFlushes(); 160 buffer_cache.PopAsyncFlushes();
158 query_cache.PopAsyncFlushes(); 161 query_cache.PopAsyncFlushes();
159 } 162 }
160 163
161 void CommitAsyncFlushes() { 164 void CommitAsyncFlushes() {
165 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
162 texture_cache.CommitAsyncFlushes(); 166 texture_cache.CommitAsyncFlushes();
163 buffer_cache.CommitAsyncFlushes(); 167 buffer_cache.CommitAsyncFlushes();
164 query_cache.CommitAsyncFlushes(); 168 query_cache.CommitAsyncFlushes();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6ab06775f..2a9bd4121 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -44,8 +44,8 @@ GPU::~GPU() = default;
44 44
45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
46 renderer = std::move(renderer_); 46 renderer = std::move(renderer_);
47 rasterizer = renderer->ReadRasterizer();
47 48
48 VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
49 memory_manager->BindRasterizer(rasterizer); 49 memory_manager->BindRasterizer(rasterizer);
50 maxwell_3d->BindRasterizer(rasterizer); 50 maxwell_3d->BindRasterizer(rasterizer);
51 fermi_2d->BindRasterizer(rasterizer); 51 fermi_2d->BindRasterizer(rasterizer);
@@ -171,7 +171,7 @@ void GPU::TickWork() {
171 const std::size_t size = request.size; 171 const std::size_t size = request.size;
172 flush_requests.pop_front(); 172 flush_requests.pop_front();
173 flush_request_mutex.unlock(); 173 flush_request_mutex.unlock();
174 renderer->Rasterizer().FlushRegion(addr, size); 174 rasterizer->FlushRegion(addr, size);
175 current_flush_fence.store(fence); 175 current_flush_fence.store(fence);
176 flush_request_mutex.lock(); 176 flush_request_mutex.lock();
177 } 177 }
@@ -193,11 +193,11 @@ u64 GPU::GetTicks() const {
193} 193}
194 194
195void GPU::FlushCommands() { 195void GPU::FlushCommands() {
196 renderer->Rasterizer().FlushCommands(); 196 rasterizer->FlushCommands();
197} 197}
198 198
199void GPU::SyncGuestHost() { 199void GPU::SyncGuestHost() {
200 renderer->Rasterizer().SyncGuestHost(); 200 rasterizer->SyncGuestHost();
201} 201}
202 202
203enum class GpuSemaphoreOperation { 203enum class GpuSemaphoreOperation {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b4ce6b154..b2ee45496 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -366,6 +366,7 @@ protected:
366 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 366 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
367 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; 367 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
368 std::unique_ptr<VideoCore::RendererBase> renderer; 368 std::unique_ptr<VideoCore::RendererBase> renderer;
369 VideoCore::RasterizerInterface* rasterizer = nullptr;
369 const bool use_nvdec; 370 const bool use_nvdec;
370 371
371private: 372private:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 7e490bcc3..50319f1d5 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
38 } 38 }
39 39
40 auto current_context = context.Acquire(); 40 auto current_context = context.Acquire();
41 VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
41 42
42 CommandDataContainer next; 43 CommandDataContainer next;
43 while (state.is_running) { 44 while (state.is_running) {
@@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
52 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { 53 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
53 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 54 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
54 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 55 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
55 renderer.Rasterizer().ReleaseFences(); 56 rasterizer->ReleaseFences();
56 } else if (std::holds_alternative<GPUTickCommand>(next.data)) { 57 } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
57 system.GPU().TickWork(); 58 system.GPU().TickWork();
58 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { 59 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
59 renderer.Rasterizer().FlushRegion(flush->addr, flush->size); 60 rasterizer->FlushRegion(flush->addr, flush->size);
60 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { 61 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
61 renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); 62 rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
62 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { 63 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
63 return; 64 return;
64 } else { 65 } else {
@@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() {
84void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 85void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
85 Core::Frontend::GraphicsContext& context, 86 Core::Frontend::GraphicsContext& context,
86 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { 87 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
88 rasterizer = renderer.ReadRasterizer();
87 thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), 89 thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
88 std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); 90 std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
89} 91}
@@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
129} 131}
130 132
131void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 133void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
132 system.Renderer().Rasterizer().OnCPUWrite(addr, size); 134 rasterizer->OnCPUWrite(addr, size);
133} 135}
134 136
135void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 137void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
136 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 138 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
137 system.Renderer().Rasterizer().OnCPUWrite(addr, size); 139 rasterizer->OnCPUWrite(addr, size);
138} 140}
139 141
140void ThreadManager::WaitIdle() const { 142void ThreadManager::WaitIdle() const {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2775629e7..4cd951169 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -27,6 +27,7 @@ class System;
27} // namespace Core 27} // namespace Core
28 28
29namespace VideoCore { 29namespace VideoCore {
30class RasterizerInterface;
30class RendererBase; 31class RendererBase;
31} // namespace VideoCore 32} // namespace VideoCore
32 33
@@ -151,11 +152,12 @@ private:
151 /// Pushes a command to be executed by the GPU thread 152 /// Pushes a command to be executed by the GPU thread
152 u64 PushCommand(CommandData&& command_data); 153 u64 PushCommand(CommandData&& command_data);
153 154
154 SynchState state;
155 Core::System& system; 155 Core::System& system;
156 std::thread thread;
157 std::thread::id thread_id;
158 const bool is_async; 156 const bool is_async;
157 VideoCore::RasterizerInterface* rasterizer = nullptr;
158
159 SynchState state;
160 std::thread thread;
159}; 161};
160 162
161} // namespace VideoCommon::GPUThread 163} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 28f2b8614..970120acc 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -12,7 +12,6 @@ set(SHADER_FILES
12 vulkan_blit_depth_stencil.frag 12 vulkan_blit_depth_stencil.frag
13 vulkan_present.frag 13 vulkan_present.frag
14 vulkan_present.vert 14 vulkan_present.vert
15 vulkan_quad_array.comp
16 vulkan_quad_indexed.comp 15 vulkan_quad_indexed.comp
17 vulkan_uint8.comp 16 vulkan_uint8.comp
18) 17)
diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp
deleted file mode 100644
index 212f4e998..000000000
--- a/src/video_core/host_shaders/vulkan_quad_array.comp
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 460 core
6
7layout (local_size_x = 1024) in;
8
9layout (std430, set = 0, binding = 0) buffer OutputBuffer {
10 uint output_indexes[];
11};
12
13layout (push_constant) uniform PushConstants {
14 uint first;
15};
16
17void main() {
18 uint primitive = gl_GlobalInvocationID.x;
19 if (primitive * 6 >= output_indexes.length()) {
20 return;
21 }
22
23 const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
24 for (uint vertex = 0; vertex < 6; ++vertex) {
25 uint index = first + primitive * 4 + quad_map[vertex];
26 output_indexes[primitive * 6 + vertex] = index;
27 }
28}
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index ad74d7af9..872291670 100644
--- a/src/video_core/host_shaders/vulkan_uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
16 uint16_t output_indexes[]; 16 uint16_t output_indexes[];
17}; 17};
18 18
19uint AssembleIndex(uint id) {
20 // Most primitive restart indices are 0xFF
21 // Hardcode this to 0xFF for now
22 uint index = uint(input_indexes[id]);
23 return index == 0xFF ? 0xFFFF : index;
24}
25
19void main() { 26void main() {
20 uint id = gl_GlobalInvocationID.x; 27 uint id = gl_GlobalInvocationID.x;
21 if (id < input_indexes.length()) { 28 if (id < input_indexes.length()) {
22 output_indexes[id] = uint16_t(input_indexes[id]); 29 output_indexes[id] = uint16_t(AssembleIndex(id));
23 } 30 }
24} 31}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index c841f3cd7..44240a9c4 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)
21 21
22MemoryManager::~MemoryManager() = default; 22MemoryManager::~MemoryManager() = default;
23 23
24void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { 24void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
25 rasterizer = &rasterizer_; 25 rasterizer = rasterizer_;
26} 26}
27 27
28GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { 28GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b468a67de..b3538d503 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -72,7 +72,7 @@ public:
72 ~MemoryManager(); 72 ~MemoryManager();
73 73
74 /// Binds a renderer to the memory manager. 74 /// Binds a renderer to the memory manager.
75 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 75 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
76 76
77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
78 78
@@ -157,6 +157,8 @@ private:
157 157
158 using MapRange = std::pair<GPUVAddr, size_t>; 158 using MapRange = std::pair<GPUVAddr, size_t>;
159 std::vector<MapRange> map_ranges; 159 std::vector<MapRange> map_ranges;
160
161 std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
160}; 162};
161 163
162} // namespace Tegra 164} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0cb0f387d..50491b758 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -7,6 +7,7 @@
7#include <atomic> 7#include <atomic>
8#include <functional> 8#include <functional>
9#include <optional> 9#include <optional>
10#include <span>
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "video_core/engines/fermi_2d.h" 12#include "video_core/engines/fermi_2d.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
@@ -49,6 +50,10 @@ public:
49 /// Records a GPU query and caches it 50 /// Records a GPU query and caches it
50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; 51 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
51 52
53 /// Signal an uniform buffer binding
54 virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
55 u32 size) = 0;
56
52 /// Signal a GPU based semaphore as a fence 57 /// Signal a GPU based semaphore as a fence
53 virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; 58 virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
54 59
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 51dde8eb5..320ee8d30 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -37,15 +37,11 @@ public:
37 std::unique_ptr<Core::Frontend::GraphicsContext> context); 37 std::unique_ptr<Core::Frontend::GraphicsContext> context);
38 virtual ~RendererBase(); 38 virtual ~RendererBase();
39 39
40 /// Initialize the renderer
41 [[nodiscard]] virtual bool Init() = 0;
42
43 /// Shutdown the renderer
44 virtual void ShutDown() = 0;
45
46 /// Finalize rendering the guest frame and draw into the presentation texture 40 /// Finalize rendering the guest frame and draw into the presentation texture
47 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 41 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
48 42
43 [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
44
49 // Getter/setter functions: 45 // Getter/setter functions:
50 // ------------------------ 46 // ------------------------
51 47
@@ -57,14 +53,6 @@ public:
57 return m_current_frame; 53 return m_current_frame;
58 } 54 }
59 55
60 [[nodiscard]] RasterizerInterface& Rasterizer() {
61 return *rasterizer;
62 }
63
64 [[nodiscard]] const RasterizerInterface& Rasterizer() const {
65 return *rasterizer;
66 }
67
68 [[nodiscard]] Core::Frontend::GraphicsContext& Context() { 56 [[nodiscard]] Core::Frontend::GraphicsContext& Context() {
69 return *context; 57 return *context;
70 } 58 }
@@ -98,7 +86,6 @@ public:
98 86
99protected: 87protected:
100 Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. 88 Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
101 std::unique_ptr<RasterizerInterface> rasterizer;
102 std::unique_ptr<Core::Frontend::GraphicsContext> context; 89 std::unique_ptr<Core::Frontend::GraphicsContext> context;
103 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer 90 f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
104 int m_current_frame = 0; ///< Current frame, should be set by the renderer 91 int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 5772cad87..6da3906a4 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <span>
6 6
7#include <glad/glad.h>
8
9#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/buffer_cache/buffer_cache.h" 7#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_opengl/gl_buffer_cache.h" 8#include "video_core/renderer_opengl/gl_buffer_cache.h"
15#include "video_core/renderer_opengl/gl_device.h" 9#include "video_core/renderer_opengl/gl_device.h"
16#include "video_core/renderer_opengl/gl_rasterizer.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h"
18 10
19namespace OpenGL { 11namespace OpenGL {
12namespace {
13struct BindlessSSBO {
14 GLuint64EXT address;
15 GLsizei length;
16 GLsizei padding;
17};
18static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);
19
20constexpr std::array PROGRAM_LUT{
21 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
22 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
23};
24} // Anonymous namespace
25
26Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
27 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
28
29Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
30 VAddr cpu_addr_, u64 size_bytes_)
31 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
32 buffer.Create();
33 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
34 glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
35 glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
36
37 if (runtime.has_unified_vertex_buffers) {
38 glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
39 }
40}
20 41
21using Maxwell = Tegra::Engines::Maxwell3D::Regs; 42void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
43 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
44 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
45}
22 46
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 47void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
48 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
49 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
50}
24 51
25Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) 52void Buffer::MakeResident(GLenum access) noexcept {
26 : BufferBlock{cpu_addr_, size_} { 53 // Abuse GLenum's order to exit early
27 gl_buffer.Create(); 54 // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); 55 if (access <= current_residency_access || buffer.handle == 0) {
29 if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { 56 return;
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); 57 }
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); 58 if (std::exchange(current_residency_access, access) != GL_NONE) {
59 // If the buffer is already resident, remove its residency before promoting it
60 glMakeNamedBufferNonResidentNV(buffer.handle);
32 } 61 }
62 glMakeNamedBufferResidentNV(buffer.handle, access);
33} 63}
34 64
35Buffer::~Buffer() = default; 65BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
36 66 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
37void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { 67 use_assembly_shaders{device.UseAssemblyShaders()},
38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), 68 has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
39 static_cast<GLsizeiptr>(data_size), data); 69 stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
70 GLint gl_max_attributes;
71 glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
72 max_attributes = static_cast<u32>(gl_max_attributes);
73 for (auto& stage_uniforms : fast_uniforms) {
74 for (OGLBuffer& buffer : stage_uniforms) {
75 buffer.Create();
76 glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
77 }
78 }
79 for (auto& stage_uniforms : copy_uniforms) {
80 for (OGLBuffer& buffer : stage_uniforms) {
81 buffer.Create();
82 glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
83 }
84 }
85 for (OGLBuffer& buffer : copy_compute_uniforms) {
86 buffer.Create();
87 glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
88 }
40} 89}
41 90
42void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 91void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download); 92 std::span<const VideoCommon::BufferCopy> copies) {
44 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); 93 for (const VideoCommon::BufferCopy& copy : copies) {
45 const GLintptr gl_offset = static_cast<GLintptr>(offset); 94 glCopyNamedBufferSubData(
46 if (read_buffer.handle == 0) { 95 src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
47 read_buffer.Create(); 96 static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
48 glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
49 GL_STREAM_READ);
50 } 97 }
51 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
52 glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
53 glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
54} 98}
55 99
56void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 100void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
57 std::size_t copy_size) { 101 if (has_unified_vertex_buffers) {
58 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), 102 buffer.MakeResident(GL_READ_ONLY);
59 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); 103 glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
104 static_cast<GLsizeiptr>(size));
105 } else {
106 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
107 index_buffer_offset = offset;
108 }
60} 109}
61 110
62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, 111void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 112 u32 stride) {
64 const Device& device_, OGLStreamBuffer& stream_buffer_, 113 if (index >= max_attributes) {
65 StateTracker& state_tracker)
66 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
67 if (!device.HasFastBufferSubData()) {
68 return; 114 return;
69 } 115 }
70 116 if (has_unified_vertex_buffers) {
71 static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); 117 buffer.MakeResident(GL_READ_ONLY);
72 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 118 glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
73 for (const GLuint cbuf : cbufs) { 119 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
74 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); 120 buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
121 } else {
122 glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
123 static_cast<GLsizei>(stride));
75 } 124 }
76} 125}
77 126
78OGLBufferCache::~OGLBufferCache() { 127void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
79 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 128 u32 offset, u32 size) {
129 if (use_assembly_shaders) {
130 GLuint handle;
131 if (offset != 0) {
132 handle = copy_uniforms[stage][binding_index].handle;
133 glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
134 } else {
135 handle = buffer.Handle();
136 }
137 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
138 static_cast<GLsizeiptr>(size));
139 } else {
140 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
141 const GLuint binding = base_binding + binding_index;
142 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
143 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
144 }
80} 145}
81 146
82std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 147void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
83 return std::make_shared<Buffer>(device, cpu_addr, size); 148 u32 size) {
149 if (use_assembly_shaders) {
150 GLuint handle;
151 if (offset != 0) {
152 handle = copy_compute_uniforms[binding_index].handle;
153 glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
154 } else {
155 handle = buffer.Handle();
156 }
157 glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
158 static_cast<GLsizeiptr>(size));
159 } else {
160 glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
161 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
162 }
84} 163}
85 164
86OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { 165void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
87 return {0, 0, 0}; 166 u32 offset, u32 size, bool is_written) {
167 if (use_assembly_shaders) {
168 const BindlessSSBO ssbo{
169 .address = buffer.HostGpuAddr() + offset,
170 .length = static_cast<GLsizei>(size),
171 .padding = 0,
172 };
173 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
174 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
175 reinterpret_cast<const GLuint*>(&ssbo));
176 } else {
177 const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
178 const GLuint binding = base_binding + binding_index;
179 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
180 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
181 }
88} 182}
89 183
90OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, 184void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
91 std::size_t size) { 185 u32 size, bool is_written) {
92 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); 186 if (use_assembly_shaders) {
93 const GLuint cbuf = cbufs[cbuf_cursor++]; 187 const BindlessSSBO ssbo{
188 .address = buffer.HostGpuAddr() + offset,
189 .length = static_cast<GLsizei>(size),
190 .padding = 0,
191 };
192 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
193 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
194 reinterpret_cast<const GLuint*>(&ssbo));
195 } else if (size == 0) {
196 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
197 } else {
198 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
199 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
200 }
201}
94 202
95 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); 203void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
96 return {cbuf, 0, 0}; 204 u32 size) {
205 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
206 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
97} 207}
98 208
99} // namespace OpenGL 209} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 17ee90316..d8b20a9af 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,79 +5,157 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <span>
9 9
10#include "common/alignment.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/dynamic_library.h"
11#include "video_core/buffer_cache/buffer_cache.h" 13#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h" 14#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 17#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 18
16namespace Core {
17class System;
18}
19
20namespace OpenGL { 19namespace OpenGL {
21 20
22class Device; 21class BufferCacheRuntime;
23class OGLStreamBuffer;
24class RasterizerOpenGL;
25class StateTracker;
26 22
27class Buffer : public VideoCommon::BufferBlock { 23class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
28public: 24public:
29 explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); 25 explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
30 ~Buffer(); 26 u64 size_bytes);
27 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
31 28
32 void Upload(std::size_t offset, std::size_t data_size, const u8* data); 29 void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
33 30
34 void Download(std::size_t offset, std::size_t data_size, u8* data); 31 void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;
35 32
36 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 33 void MakeResident(GLenum access) noexcept;
37 std::size_t copy_size);
38 34
39 GLuint Handle() const noexcept { 35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
40 return gl_buffer.handle; 36 return address;
41 } 37 }
42 38
43 u64 Address() const noexcept { 39 [[nodiscard]] GLuint Handle() const noexcept {
44 return gpu_address; 40 return buffer.handle;
45 } 41 }
46 42
47private: 43private:
48 OGLBuffer gl_buffer; 44 GLuint64EXT address = 0;
49 OGLBuffer read_buffer; 45 OGLBuffer buffer;
50 u64 gpu_address = 0; 46 GLenum current_residency_access = GL_NONE;
51}; 47};
52 48
53using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; 49class BufferCacheRuntime {
54class OGLBufferCache final : public GenericBufferCache { 50 friend Buffer;
51
55public: 52public:
56 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, 53 static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
57 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 54
58 const Device& device, OGLStreamBuffer& stream_buffer, 55 explicit BufferCacheRuntime(const Device& device_);
59 StateTracker& state_tracker); 56
60 ~OGLBufferCache(); 57 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
58 std::span<const VideoCommon::BufferCopy> copies);
59
60 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
61
62 void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
63
64 void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
65
66 void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
67
68 void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
69 bool is_written);
70
71 void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
72 bool is_written);
73
74 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
75
76 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
77 if (use_assembly_shaders) {
78 const GLuint handle = fast_uniforms[stage][binding_index].handle;
79 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
80 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
81 } else {
82 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
83 const GLuint binding = base_binding + binding_index;
84 glBindBufferRange(GL_UNIFORM_BUFFER, binding,
85 fast_uniforms[stage][binding_index].handle, 0,
86 static_cast<GLsizeiptr>(size));
87 }
88 }
61 89
62 BufferInfo GetEmptyBuffer(std::size_t) override; 90 void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
91 if (use_assembly_shaders) {
92 glProgramBufferParametersIuivNV(
93 PABO_LUT[stage], binding_index, 0,
94 static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
95 reinterpret_cast<const GLuint*>(data.data()));
96 } else {
97 glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
98 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
99 }
100 }
63 101
64 void Acquire() noexcept { 102 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
65 cbuf_cursor = 0; 103 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
104 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
105 const GLuint binding = base_binding + binding_index;
106 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
107 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
108 return mapped_span;
66 } 109 }
67 110
68protected: 111 [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
69 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 112 return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
113 }
70 114
71 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; 115 [[nodiscard]] bool HasFastBufferSubData() const noexcept {
116 return has_fast_buffer_sub_data;
117 }
72 118
73private: 119private:
74 static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * 120 static constexpr std::array PABO_LUT{
75 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; 121 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
122 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
123 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
124 };
76 125
77 const Device& device; 126 const Device& device;
78 127
79 std::size_t cbuf_cursor = 0; 128 bool has_fast_buffer_sub_data = false;
80 std::array<GLuint, NUM_CBUFS> cbufs{}; 129 bool use_assembly_shaders = false;
130 bool has_unified_vertex_buffers = false;
131
132 u32 max_attributes = 0;
133
134 std::optional<StreamBuffer> stream_buffer;
135
136 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
137 VideoCommon::NUM_STAGES>
138 fast_uniforms;
139 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
140 VideoCommon::NUM_STAGES>
141 copy_uniforms;
142 std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
143
144 u32 index_buffer_offset = 0;
145};
146
147struct BufferCacheParams {
148 using Runtime = OpenGL::BufferCacheRuntime;
149 using Buffer = OpenGL::Buffer;
150
151 static constexpr bool IS_OPENGL = true;
152 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
153 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
154 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
155 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
156 static constexpr bool USE_MEMORY_MAPS = false;
81}; 157};
82 158
159using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
160
83} // namespace OpenGL 161} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 04c267ee4..48d5c4a5e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,7 @@
21#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
22 22
23namespace OpenGL { 23namespace OpenGL {
24
25namespace { 24namespace {
26
27// One uniform block is reserved for emulation purposes 25// One uniform block is reserved for emulation purposes
28constexpr u32 ReservedUniformBlocks = 1; 26constexpr u32 ReservedUniformBlocks = 1;
29 27
@@ -197,11 +195,13 @@ bool IsASTCSupported() {
197 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 195 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
198 return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); 196 return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
199} 197}
200
201} // Anonymous namespace 198} // Anonymous namespace
202 199
203Device::Device() 200Device::Device() {
204 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { 201 if (!GLAD_GL_VERSION_4_6) {
202 LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
203 throw std::runtime_error{"Insufficient version"};
204 }
205 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 205 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
206 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); 206 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
207 const std::vector extensions = GetExtensions(); 207 const std::vector extensions = GetExtensions();
@@ -217,6 +217,9 @@ Device::Device()
217 "Beta driver 443.24 is known to have issues. There might be performance issues."); 217 "Beta driver 443.24 is known to have issues. There might be performance issues.");
218 disable_fast_buffer_sub_data = true; 218 disable_fast_buffer_sub_data = true;
219 } 219 }
220
221 max_uniform_buffers = BuildMaxUniformBuffers();
222 base_bindings = BuildBaseBindings();
220 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 223 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
221 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 224 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
222 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 225 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9141de635..ee053776d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@
10 10
11namespace OpenGL { 11namespace OpenGL {
12 12
13static constexpr u32 EmulationUniformBlockBinding = 0; 13class Device {
14
15class Device final {
16public: 14public:
17 struct BaseBindings final { 15 struct BaseBindings {
18 u32 uniform_buffer{}; 16 u32 uniform_buffer{};
19 u32 shader_storage_buffer{}; 17 u32 shader_storage_buffer{};
20 u32 sampler{}; 18 u32 sampler{};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 3e9c922f5..151290101 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {
47 47
48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, 48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
49 Tegra::GPU& gpu_, TextureCache& texture_cache_, 49 Tegra::GPU& gpu_, TextureCache& texture_cache_,
50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_) 50 BufferCache& buffer_cache_, QueryCache& query_cache_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
52 52
53Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { 53Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 30dbee613..e714aa115 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -32,14 +32,13 @@ private:
32}; 32};
33 33
34using Fence = std::shared_ptr<GLInnerFence>; 34using Fence = std::shared_ptr<GLInnerFence>;
35using GenericFenceManager = 35using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
36 VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
37 36
38class FenceManagerOpenGL final : public GenericFenceManager { 37class FenceManagerOpenGL final : public GenericFenceManager {
39public: 38public:
40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 39 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
41 TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, 40 TextureCache& texture_cache, BufferCache& buffer_cache,
42 QueryCache& query_cache_); 41 QueryCache& query_cache);
43 42
44protected: 43protected:
45 Fence CreateFence(u32 value, bool is_stubbed) override; 44 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8aa63d329..418644108 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -38,37 +38,21 @@
38namespace OpenGL { 38namespace OpenGL {
39 39
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 40using Maxwell = Tegra::Engines::Maxwell3D::Regs;
41using GLvec4 = std::array<GLfloat, 4>;
41 42
42using Tegra::Engines::ShaderType; 43using Tegra::Engines::ShaderType;
43using VideoCore::Surface::PixelFormat; 44using VideoCore::Surface::PixelFormat;
44using VideoCore::Surface::SurfaceTarget; 45using VideoCore::Surface::SurfaceTarget;
45using VideoCore::Surface::SurfaceType; 46using VideoCore::Surface::SurfaceType;
46 47
47MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
48MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
49MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
50MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
51MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
52MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
53MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
54MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); 48MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
49MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
55MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); 50MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
56MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); 51MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
57MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
58 52
59namespace { 53namespace {
60 54
61constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
62constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
63 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
64constexpr size_t TOTAL_CONST_BUFFER_BYTES =
65 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
66
67constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
68constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
69
70constexpr size_t MAX_TEXTURES = 192;
71constexpr size_t MAX_IMAGES = 48;
72 56
73struct TextureHandle { 57struct TextureHandle {
74 constexpr TextureHandle(u32 data, bool via_header_index) { 58 constexpr TextureHandle(u32 data, bool via_header_index) {
@@ -104,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
104 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); 88 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
105} 89}
106 90
107std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
108 const ConstBufferEntry& entry) {
109 if (!entry.IsIndirect()) {
110 return entry.GetSize();
111 }
112 if (buffer.size > Maxwell::MaxConstBufferSize) {
113 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
114 Maxwell::MaxConstBufferSize);
115 return Maxwell::MaxConstBufferSize;
116 }
117
118 return buffer.size;
119}
120
121/// Translates hardware transform feedback indices 91/// Translates hardware transform feedback indices
122/// @param location Hardware location 92/// @param location Hardware location
123/// @return Pair of ARB_transform_feedback3 token stream first and third arguments 93/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -150,14 +120,6 @@ void oglEnable(GLenum cap, bool state) {
150 (state ? glEnable : glDisable)(cap); 120 (state ? glEnable : glDisable)(cap);
151} 121}
152 122
153void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
154 if (num_ssbos == 0) {
155 return;
156 }
157 glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
158 reinterpret_cast<const GLuint*>(ssbos));
159}
160
161ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { 123ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
162 if (entry.is_buffer) { 124 if (entry.is_buffer) {
163 return ImageViewType::Buffer; 125 return ImageViewType::Buffer;
@@ -204,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
204 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), 166 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
205 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), 167 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
206 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), 168 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
207 stream_buffer(device, state_tracker),
208 texture_cache_runtime(device, program_manager, state_tracker), 169 texture_cache_runtime(device, program_manager, state_tracker),
209 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 170 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
171 buffer_cache_runtime(device),
172 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
210 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 173 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
211 query_cache(*this, maxwell3d, gpu_memory), 174 query_cache(*this, maxwell3d, gpu_memory),
212 buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
213 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 175 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
214 async_shaders(emu_window_) { 176 async_shaders(emu_window_) {
215 unified_uniform_buffer.Create();
216 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
217
218 if (device.UseAssemblyShaders()) {
219 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
220 for (const GLuint cbuf : staging_cbufs) {
221 glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
222 nullptr, 0);
223 }
224 }
225 if (device.UseAsynchronousShaders()) { 177 if (device.UseAsynchronousShaders()) {
226 async_shaders.AllocateWorkers(); 178 async_shaders.AllocateWorkers();
227 } 179 }
228} 180}
229 181
230RasterizerOpenGL::~RasterizerOpenGL() { 182RasterizerOpenGL::~RasterizerOpenGL() = default;
231 if (device.UseAssemblyShaders()) {
232 glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
233 }
234}
235 183
236void RasterizerOpenGL::SetupVertexFormat() { 184void RasterizerOpenGL::SyncVertexFormats() {
237 auto& flags = maxwell3d.dirty.flags; 185 auto& flags = maxwell3d.dirty.flags;
238 if (!flags[Dirty::VertexFormats]) { 186 if (!flags[Dirty::VertexFormats]) {
239 return; 187 return;
240 } 188 }
241 flags[Dirty::VertexFormats] = false; 189 flags[Dirty::VertexFormats] = false;
242 190
243 MICROPROFILE_SCOPE(OpenGL_VAO);
244
245 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables 191 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
246 // the first 16 vertex attributes always, as we don't know which ones are actually used until 192 // the first 16 vertex attributes always, as we don't know which ones are actually used until
247 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to 193 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -277,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
277 } 223 }
278} 224}
279 225
280void RasterizerOpenGL::SetupVertexBuffer() { 226void RasterizerOpenGL::SyncVertexInstances() {
281 auto& flags = maxwell3d.dirty.flags;
282 if (!flags[Dirty::VertexBuffers]) {
283 return;
284 }
285 flags[Dirty::VertexBuffers] = false;
286
287 MICROPROFILE_SCOPE(OpenGL_VB);
288
289 const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
290
291 // Upload all guest vertex arrays sequentially to our buffer
292 const auto& regs = maxwell3d.regs;
293 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
294 if (!flags[Dirty::VertexBuffer0 + index]) {
295 continue;
296 }
297 flags[Dirty::VertexBuffer0 + index] = false;
298
299 const auto& vertex_array = regs.vertex_array[index];
300 if (!vertex_array.IsEnabled()) {
301 continue;
302 }
303
304 const GPUVAddr start = vertex_array.StartAddress();
305 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
306 ASSERT(end >= start);
307
308 const GLuint gl_index = static_cast<GLuint>(index);
309 const u64 size = end - start;
310 if (size == 0) {
311 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
312 if (use_unified_memory) {
313 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
314 }
315 continue;
316 }
317 const auto info = buffer_cache.UploadMemory(start, size);
318 if (use_unified_memory) {
319 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
320 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
321 info.address + info.offset, size);
322 } else {
323 glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
324 }
325 }
326}
327
328void RasterizerOpenGL::SetupVertexInstances() {
329 auto& flags = maxwell3d.dirty.flags; 227 auto& flags = maxwell3d.dirty.flags;
330 if (!flags[Dirty::VertexInstances]) { 228 if (!flags[Dirty::VertexInstances]) {
331 return; 229 return;
@@ -346,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
346 } 244 }
347} 245}
348 246
349GLintptr RasterizerOpenGL::SetupIndexBuffer() { 247void RasterizerOpenGL::SetupShaders(bool is_indexed) {
350 MICROPROFILE_SCOPE(OpenGL_Index);
351 const auto& regs = maxwell3d.regs;
352 const std::size_t size = CalculateIndexBufferSize();
353 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
354 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
355 return info.offset;
356}
357
358void RasterizerOpenGL::SetupShaders() {
359 MICROPROFILE_SCOPE(OpenGL_Shader);
360 u32 clip_distances = 0; 248 u32 clip_distances = 0;
361 249
362 std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; 250 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -413,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() {
413 const size_t stage = index == 0 ? 0 : index - 1; 301 const size_t stage = index == 0 ? 0 : index - 1;
414 shaders[stage] = shader; 302 shaders[stage] = shader;
415 303
416 SetupDrawConstBuffers(stage, shader);
417 SetupDrawGlobalMemory(stage, shader);
418 SetupDrawTextures(shader, stage); 304 SetupDrawTextures(shader, stage);
419 SetupDrawImages(shader, stage); 305 SetupDrawImages(shader, stage);
420 306
307 buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
308
309 buffer_cache.UnbindGraphicsStorageBuffers(stage);
310 u32 ssbo_index = 0;
311 for (const auto& buffer : shader->GetEntries().global_memory_entries) {
312 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
313 buffer.cbuf_offset, buffer.is_written);
314 ++ssbo_index;
315 }
316
421 // Workaround for Intel drivers. 317 // Workaround for Intel drivers.
422 // When a clip distance is enabled but not set in the shader it crops parts of the screen 318 // When a clip distance is enabled but not set in the shader it crops parts of the screen
423 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 319 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -433,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() {
433 SyncClipEnabled(clip_distances); 329 SyncClipEnabled(clip_distances);
434 maxwell3d.dirty.flags[Dirty::Shaders] = false; 330 maxwell3d.dirty.flags[Dirty::Shaders] = false;
435 331
332 buffer_cache.UpdateGraphicsBuffers(is_indexed);
333
436 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 334 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
437 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); 335 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
438 336
337 buffer_cache.BindHostGeometryBuffers(is_indexed);
338
439 size_t image_view_index = 0; 339 size_t image_view_index = 0;
440 size_t texture_index = 0; 340 size_t texture_index = 0;
441 size_t image_index = 0; 341 size_t image_index = 0;
442 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 342 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
443 const Shader* const shader = shaders[stage]; 343 const Shader* const shader = shaders[stage];
444 if (shader) { 344 if (!shader) {
445 const auto base = device.GetBaseBindings(stage);
446 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
447 texture_index, image_index);
448 }
449 }
450}
451
452std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
453 const auto& regs = maxwell3d.regs;
454
455 std::size_t size = 0;
456 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
457 if (!regs.vertex_array[index].IsEnabled())
458 continue; 345 continue;
459 346 }
460 const GPUVAddr start = regs.vertex_array[index].StartAddress(); 347 buffer_cache.BindHostStageBuffers(stage);
461 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 348 const auto& base = device.GetBaseBindings(stage);
462 349 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
463 size += end - start; 350 texture_index, image_index);
464 ASSERT(end >= start);
465 } 351 }
466
467 return size;
468}
469
470std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
471 return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
472 static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
473} 352}
474 353
475void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, 354void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -478,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
478} 357}
479 358
480void RasterizerOpenGL::Clear() { 359void RasterizerOpenGL::Clear() {
360 MICROPROFILE_SCOPE(OpenGL_Clears);
481 if (!maxwell3d.ShouldExecute()) { 361 if (!maxwell3d.ShouldExecute()) {
482 return; 362 return;
483 } 363 }
@@ -528,11 +408,9 @@ void RasterizerOpenGL::Clear() {
528 } 408 }
529 UNIMPLEMENTED_IF(regs.clear_flags.viewport); 409 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
530 410
531 { 411 std::scoped_lock lock{texture_cache.mutex};
532 auto lock = texture_cache.AcquireLock(); 412 texture_cache.UpdateRenderTargets(true);
533 texture_cache.UpdateRenderTargets(true); 413 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
534 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
535 }
536 414
537 if (use_color) { 415 if (use_color) {
538 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 416 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -544,7 +422,6 @@ void RasterizerOpenGL::Clear() {
544 } else if (use_stencil) { 422 } else if (use_stencil) {
545 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 423 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
546 } 424 }
547
548 ++num_queued_commands; 425 ++num_queued_commands;
549} 426}
550 427
@@ -553,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
553 430
554 query_cache.UpdateCounters(); 431 query_cache.UpdateCounters();
555 432
556 SyncViewport(); 433 SyncState();
557 SyncRasterizeEnable();
558 SyncPolygonModes();
559 SyncColorMask();
560 SyncFragmentColorClampState();
561 SyncMultiSampleState();
562 SyncDepthTestState();
563 SyncDepthClamp();
564 SyncStencilTestState();
565 SyncBlendState();
566 SyncLogicOpState();
567 SyncCullMode();
568 SyncPrimitiveRestart();
569 SyncScissorTest();
570 SyncPointState();
571 SyncLineState();
572 SyncPolygonOffset();
573 SyncAlphaTest();
574 SyncFramebufferSRGB();
575
576 buffer_cache.Acquire();
577 current_cbuf = 0;
578
579 std::size_t buffer_size = CalculateVertexArraysSize();
580
581 // Add space for index buffer
582 if (is_indexed) {
583 buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
584 }
585
586 // Uniform space for the 5 shader stages
587 buffer_size =
588 Common::AlignUp<std::size_t>(buffer_size, 4) +
589 (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
590
591 // Add space for at least 18 constant buffers
592 buffer_size += Maxwell::MaxConstBuffers *
593 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
594
595 // Prepare the vertex array.
596 buffer_cache.Map(buffer_size);
597
598 // Prepare vertex array format.
599 SetupVertexFormat();
600
601 // Upload vertex and index data.
602 SetupVertexBuffer();
603 SetupVertexInstances();
604 GLintptr index_buffer_offset = 0;
605 if (is_indexed) {
606 index_buffer_offset = SetupIndexBuffer();
607 }
608
609 // Setup emulation uniform buffer.
610 if (!device.UseAssemblyShaders()) {
611 MaxwellUniformData ubo;
612 ubo.SetFromRegs(maxwell3d);
613 const auto info =
614 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
615 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
616 static_cast<GLsizeiptr>(sizeof(ubo)));
617 }
618 434
619 // Setup shaders and their used resources. 435 // Setup shaders and their used resources.
620 auto lock = texture_cache.AcquireLock(); 436 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
621 SetupShaders(); 437 SetupShaders(is_indexed);
622 438
623 // Signal the buffer cache that we are not going to upload more things.
624 buffer_cache.Unmap();
625 texture_cache.UpdateRenderTargets(false); 439 texture_cache.UpdateRenderTargets(false);
626 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); 440 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
627 program_manager.BindGraphicsPipeline(); 441 program_manager.BindGraphicsPipeline();
@@ -635,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
635 if (is_indexed) { 449 if (is_indexed) {
636 const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); 450 const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
637 const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); 451 const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
638 const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); 452 const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
639 const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); 453 const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
640 if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { 454 if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
641 glDrawElements(primitive_mode, num_vertices, format, offset); 455 glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -675,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
675} 489}
676 490
677void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 491void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
678 buffer_cache.Acquire();
679 current_cbuf = 0;
680
681 Shader* const kernel = shader_cache.GetComputeKernel(code_addr); 492 Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
682 493
683 auto lock = texture_cache.AcquireLock(); 494 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
684 BindComputeTextures(kernel); 495 BindComputeTextures(kernel);
685 496
686 const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * 497 const auto& entries = kernel->GetEntries();
687 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 498 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
688 buffer_cache.Map(buffer_size); 499 buffer_cache.UnbindComputeStorageBuffers();
689 500 u32 ssbo_index = 0;
690 SetupComputeConstBuffers(kernel); 501 for (const auto& buffer : entries.global_memory_entries) {
691 SetupComputeGlobalMemory(kernel); 502 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
692 503 buffer.is_written);
693 buffer_cache.Unmap(); 504 ++ssbo_index;
505 }
506 buffer_cache.UpdateComputeBuffers();
507 buffer_cache.BindHostComputeBuffers();
694 508
695 const auto& launch_desc = kepler_compute.launch_description; 509 const auto& launch_desc = kepler_compute.launch_description;
696 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 510 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -706,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
706 query_cache.Query(gpu_addr, type, timestamp); 520 query_cache.Query(gpu_addr, type, timestamp);
707} 521}
708 522
523void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
524 u32 size) {
525 std::scoped_lock lock{buffer_cache.mutex};
526 buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
527}
528
709void RasterizerOpenGL::FlushAll() {} 529void RasterizerOpenGL::FlushAll() {}
710 530
711void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 531void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -714,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
714 return; 534 return;
715 } 535 }
716 { 536 {
717 auto lock = texture_cache.AcquireLock(); 537 std::scoped_lock lock{texture_cache.mutex};
718 texture_cache.DownloadMemory(addr, size); 538 texture_cache.DownloadMemory(addr, size);
719 } 539 }
720 buffer_cache.FlushRegion(addr, size); 540 {
541 std::scoped_lock lock{buffer_cache.mutex};
542 buffer_cache.DownloadMemory(addr, size);
543 }
721 query_cache.FlushRegion(addr, size); 544 query_cache.FlushRegion(addr, size);
722} 545}
723 546
724bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { 547bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
548 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
725 if (!Settings::IsGPULevelHigh()) { 549 if (!Settings::IsGPULevelHigh()) {
726 return buffer_cache.MustFlushRegion(addr, size); 550 return buffer_cache.IsRegionGpuModified(addr, size);
727 } 551 }
728 return texture_cache.IsRegionGpuModified(addr, size) || 552 return texture_cache.IsRegionGpuModified(addr, size) ||
729 buffer_cache.MustFlushRegion(addr, size); 553 buffer_cache.IsRegionGpuModified(addr, size);
730} 554}
731 555
732void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 556void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -735,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
735 return; 559 return;
736 } 560 }
737 { 561 {
738 auto lock = texture_cache.AcquireLock(); 562 std::scoped_lock lock{texture_cache.mutex};
739 texture_cache.WriteMemory(addr, size); 563 texture_cache.WriteMemory(addr, size);
740 } 564 }
565 {
566 std::scoped_lock lock{buffer_cache.mutex};
567 buffer_cache.WriteMemory(addr, size);
568 }
741 shader_cache.InvalidateRegion(addr, size); 569 shader_cache.InvalidateRegion(addr, size);
742 buffer_cache.InvalidateRegion(addr, size);
743 query_cache.InvalidateRegion(addr, size); 570 query_cache.InvalidateRegion(addr, size);
744} 571}
745 572
@@ -748,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
748 if (addr == 0 || size == 0) { 575 if (addr == 0 || size == 0) {
749 return; 576 return;
750 } 577 }
578 shader_cache.OnCPUWrite(addr, size);
751 { 579 {
752 auto lock = texture_cache.AcquireLock(); 580 std::scoped_lock lock{texture_cache.mutex};
753 texture_cache.WriteMemory(addr, size); 581 texture_cache.WriteMemory(addr, size);
754 } 582 }
755 shader_cache.OnCPUWrite(addr, size); 583 {
756 buffer_cache.OnCPUWrite(addr, size); 584 std::scoped_lock lock{buffer_cache.mutex};
585 buffer_cache.CachedWriteMemory(addr, size);
586 }
757} 587}
758 588
759void RasterizerOpenGL::SyncGuestHost() { 589void RasterizerOpenGL::SyncGuestHost() {
760 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 590 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
761 buffer_cache.SyncGuestHost();
762 shader_cache.SyncGuestHost(); 591 shader_cache.SyncGuestHost();
592 {
593 std::scoped_lock lock{buffer_cache.mutex};
594 buffer_cache.FlushCachedWrites();
595 }
763} 596}
764 597
765void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { 598void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
766 { 599 {
767 auto lock = texture_cache.AcquireLock(); 600 std::scoped_lock lock{texture_cache.mutex};
768 texture_cache.UnmapMemory(addr, size); 601 texture_cache.UnmapMemory(addr, size);
769 } 602 }
770 buffer_cache.OnCPUWrite(addr, size); 603 {
604 std::scoped_lock lock{buffer_cache.mutex};
605 buffer_cache.WriteMemory(addr, size);
606 }
771 shader_cache.OnCPUWrite(addr, size); 607 shader_cache.OnCPUWrite(addr, size);
772} 608}
773 609
@@ -802,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
802} 638}
803 639
804void RasterizerOpenGL::WaitForIdle() { 640void RasterizerOpenGL::WaitForIdle() {
805 // Place a barrier on everything that is not framebuffer related. 641 glMemoryBarrier(GL_ALL_BARRIER_BITS);
806 // This is related to another flag that is not currently implemented.
807 glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
808 GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
809 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
810 GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
811 GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
812 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
813} 642}
814 643
815void RasterizerOpenGL::FragmentBarrier() { 644void RasterizerOpenGL::FragmentBarrier() {
@@ -834,18 +663,21 @@ void RasterizerOpenGL::TickFrame() {
834 num_queued_commands = 0; 663 num_queued_commands = 0;
835 664
836 fence_manager.TickFrame(); 665 fence_manager.TickFrame();
837 buffer_cache.TickFrame();
838 { 666 {
839 auto lock = texture_cache.AcquireLock(); 667 std::scoped_lock lock{texture_cache.mutex};
840 texture_cache.TickFrame(); 668 texture_cache.TickFrame();
841 } 669 }
670 {
671 std::scoped_lock lock{buffer_cache.mutex};
672 buffer_cache.TickFrame();
673 }
842} 674}
843 675
844bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 676bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
845 const Tegra::Engines::Fermi2D::Surface& dst, 677 const Tegra::Engines::Fermi2D::Surface& dst,
846 const Tegra::Engines::Fermi2D::Config& copy_config) { 678 const Tegra::Engines::Fermi2D::Config& copy_config) {
847 MICROPROFILE_SCOPE(OpenGL_Blits); 679 MICROPROFILE_SCOPE(OpenGL_Blits);
848 auto lock = texture_cache.AcquireLock(); 680 std::scoped_lock lock{texture_cache.mutex};
849 texture_cache.BlitImage(dst, src, copy_config); 681 texture_cache.BlitImage(dst, src, copy_config);
850 return true; 682 return true;
851} 683}
@@ -857,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
857 } 689 }
858 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 690 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
859 691
860 auto lock = texture_cache.AcquireLock(); 692 std::scoped_lock lock{texture_cache.mutex};
861 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; 693 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
862 if (!image_view) { 694 if (!image_view) {
863 return false; 695 return false;
@@ -924,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
924 } 756 }
925} 757}
926 758
927void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
928 static constexpr std::array PARAMETER_LUT{
929 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
930 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
931 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
932 };
933 MICROPROFILE_SCOPE(OpenGL_UBO);
934 const auto& stages = maxwell3d.state.shader_stages;
935 const auto& shader_stage = stages[stage_index];
936 const auto& entries = shader->GetEntries();
937 const bool use_unified = entries.use_unified_uniforms;
938 const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
939
940 const auto base_bindings = device.GetBaseBindings(stage_index);
941 u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
942 for (const auto& entry : entries.const_buffers) {
943 const u32 index = entry.GetIndex();
944 const auto& buffer = shader_stage.const_buffers[index];
945 SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
946 base_unified_offset + index * Maxwell::MaxConstBufferSize);
947 ++binding;
948 }
949 if (use_unified) {
950 const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
951 entries.global_memory_entries.size());
952 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
953 base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
954 }
955}
956
957void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
958 MICROPROFILE_SCOPE(OpenGL_UBO);
959 const auto& launch_desc = kepler_compute.launch_description;
960 const auto& entries = kernel->GetEntries();
961 const bool use_unified = entries.use_unified_uniforms;
962
963 u32 binding = 0;
964 for (const auto& entry : entries.const_buffers) {
965 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
966 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
967 Tegra::Engines::ConstBufferInfo buffer;
968 buffer.address = config.Address();
969 buffer.size = config.size;
970 buffer.enabled = mask[entry.GetIndex()];
971 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
972 use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
973 ++binding;
974 }
975 if (use_unified) {
976 const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
977 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
978 NUM_CONST_BUFFERS_BYTES_PER_STAGE);
979 }
980}
981
982void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
983 const Tegra::Engines::ConstBufferInfo& buffer,
984 const ConstBufferEntry& entry, bool use_unified,
985 std::size_t unified_offset) {
986 if (!buffer.enabled) {
987 // Set values to zero to unbind buffers
988 if (device.UseAssemblyShaders()) {
989 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
990 } else {
991 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
992 }
993 return;
994 }
995
996 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
997 // UBO alignment requirements.
998 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
999
1000 const bool fast_upload = !use_unified && device.HasFastBufferSubData();
1001
1002 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
1003 const GPUVAddr gpu_addr = buffer.address;
1004 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
1005
1006 if (device.UseAssemblyShaders()) {
1007 UNIMPLEMENTED_IF(use_unified);
1008 if (info.offset != 0) {
1009 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
1010 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
1011 info.handle = staging_cbuf;
1012 info.offset = 0;
1013 }
1014 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
1015 return;
1016 }
1017
1018 if (use_unified) {
1019 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
1020 unified_offset, size);
1021 } else {
1022 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
1023 }
1024}
1025
1026void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
1027 static constexpr std::array TARGET_LUT = {
1028 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1029 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1030 };
1031 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1032 const auto& entries{shader->GetEntries().global_memory_entries};
1033
1034 std::array<BindlessSSBO, 32> ssbos;
1035 ASSERT(entries.size() < ssbos.size());
1036
1037 const bool assembly_shaders = device.UseAssemblyShaders();
1038 u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
1039 for (const auto& entry : entries) {
1040 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
1041 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1042 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1043 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1044 ++binding;
1045 }
1046 if (assembly_shaders) {
1047 UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
1048 }
1049}
1050
1051void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1052 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1053 const auto& entries{kernel->GetEntries().global_memory_entries};
1054
1055 std::array<BindlessSSBO, 32> ssbos;
1056 ASSERT(entries.size() < ssbos.size());
1057
1058 u32 binding = 0;
1059 for (const auto& entry : entries) {
1060 const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
1061 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1062 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1063 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1064 ++binding;
1065 }
1066 if (device.UseAssemblyShaders()) {
1067 UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
1068 }
1069}
1070
1071void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1072 GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
1073 const size_t alignment{device.GetShaderStorageBufferAlignment()};
1074 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1075 if (device.UseAssemblyShaders()) {
1076 *ssbo = BindlessSSBO{
1077 .address = static_cast<GLuint64EXT>(info.address + info.offset),
1078 .length = static_cast<GLsizei>(size),
1079 .padding = 0,
1080 };
1081 } else {
1082 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1083 static_cast<GLsizeiptr>(size));
1084 }
1085}
1086
1087void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { 759void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
1088 const bool via_header_index = 760 const bool via_header_index =
1089 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 761 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1131,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
1131 } 803 }
1132} 804}
1133 805
806void RasterizerOpenGL::SyncState() {
807 SyncViewport();
808 SyncRasterizeEnable();
809 SyncPolygonModes();
810 SyncColorMask();
811 SyncFragmentColorClampState();
812 SyncMultiSampleState();
813 SyncDepthTestState();
814 SyncDepthClamp();
815 SyncStencilTestState();
816 SyncBlendState();
817 SyncLogicOpState();
818 SyncCullMode();
819 SyncPrimitiveRestart();
820 SyncScissorTest();
821 SyncPointState();
822 SyncLineState();
823 SyncPolygonOffset();
824 SyncAlphaTest();
825 SyncFramebufferSRGB();
826 SyncVertexFormats();
827 SyncVertexInstances();
828}
829
1134void RasterizerOpenGL::SyncViewport() { 830void RasterizerOpenGL::SyncViewport() {
1135 auto& flags = maxwell3d.dirty.flags; 831 auto& flags = maxwell3d.dirty.flags;
1136 const auto& regs = maxwell3d.regs; 832 const auto& regs = maxwell3d.regs;
@@ -1166,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() {
1166 if (regs.screen_y_control.y_negate != 0) { 862 if (regs.screen_y_control.y_negate != 0) {
1167 flip_y = !flip_y; 863 flip_y = !flip_y;
1168 } 864 }
1169 glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, 865 const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
1170 regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE 866 const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
1171 : GL_NEGATIVE_ONE_TO_ONE); 867 const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
868 state_tracker.ClipControl(origin, depth);
869 state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
1172 } 870 }
1173 871
1174 if (dirty_viewport) { 872 if (dirty_viewport) {
@@ -1652,36 +1350,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1652 if (regs.tfb_enabled == 0) { 1350 if (regs.tfb_enabled == 0) {
1653 return; 1351 return;
1654 } 1352 }
1655
1656 if (device.UseAssemblyShaders()) { 1353 if (device.UseAssemblyShaders()) {
1657 SyncTransformFeedback(); 1354 SyncTransformFeedback();
1658 } 1355 }
1659
1660 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 1356 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1661 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 1357 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1662 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 1358 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1663 1359 UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
1664 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1665 const auto& binding = regs.tfb_bindings[index];
1666 if (!binding.buffer_enable) {
1667 if (enabled_transform_feedback_buffers[index]) {
1668 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1669 0);
1670 }
1671 enabled_transform_feedback_buffers[index] = false;
1672 continue;
1673 }
1674 enabled_transform_feedback_buffers[index] = true;
1675
1676 auto& tfb_buffer = transform_feedback_buffers[index];
1677 tfb_buffer.Create();
1678
1679 const GLuint handle = tfb_buffer.handle;
1680 const std::size_t size = binding.buffer_size;
1681 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1682 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1683 static_cast<GLsizeiptr>(size));
1684 }
1685 1360
1686 // We may have to call BeginTransformFeedbackNV here since they seem to call different 1361 // We may have to call BeginTransformFeedbackNV here since they seem to call different
1687 // implementations on Nvidia's driver (the pointer is different) but we are using 1362 // implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1695,23 +1370,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
1695 if (regs.tfb_enabled == 0) { 1370 if (regs.tfb_enabled == 0) {
1696 return; 1371 return;
1697 } 1372 }
1698
1699 glEndTransformFeedback(); 1373 glEndTransformFeedback();
1700
1701 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1702 const auto& binding = regs.tfb_bindings[index];
1703 if (!binding.buffer_enable) {
1704 continue;
1705 }
1706 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1707
1708 const GLuint handle = transform_feedback_buffers[index].handle;
1709 const GPUVAddr gpu_addr = binding.Address();
1710 const std::size_t size = binding.buffer_size;
1711 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1712 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1713 static_cast<GLsizeiptr>(size));
1714 }
1715} 1374}
1716 1375
1717} // namespace OpenGL 1376} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 82e03e677..3745cf637 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,6 @@
30#include "video_core/renderer_opengl/gl_shader_decompiler.h" 30#include "video_core/renderer_opengl/gl_shader_decompiler.h"
31#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
32#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
33#include "video_core/renderer_opengl/gl_stream_buffer.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 33#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/shader/async_shaders.h" 34#include "video_core/shader/async_shaders.h"
36#include "video_core/textures/texture.h" 35#include "video_core/textures/texture.h"
@@ -72,6 +71,7 @@ public:
72 void DispatchCompute(GPUVAddr code_addr) override; 71 void DispatchCompute(GPUVAddr code_addr) override;
73 void ResetCounter(VideoCore::QueryType type) override; 72 void ResetCounter(VideoCore::QueryType type) override;
74 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 73 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
74 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
75 void FlushAll() override; 75 void FlushAll() override;
76 void FlushRegion(VAddr addr, u64 size) override; 76 void FlushRegion(VAddr addr, u64 size) override;
77 bool MustFlushRegion(VAddr addr, u64 size) override; 77 bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -119,27 +119,6 @@ private:
119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, 119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
120 size_t& image_view_index, size_t& texture_index, size_t& image_index); 120 size_t& image_view_index, size_t& texture_index, size_t& image_index);
121 121
122 /// Configures the current constbuffers to use for the draw command.
123 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
124
125 /// Configures the current constbuffers to use for the kernel invocation.
126 void SetupComputeConstBuffers(Shader* kernel);
127
128 /// Configures a constant buffer.
129 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
130 const ConstBufferEntry& entry, bool use_unified,
131 std::size_t unified_offset);
132
133 /// Configures the current global memory entries to use for the draw command.
134 void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
135
136 /// Configures the current global memory entries to use for the kernel invocation.
137 void SetupComputeGlobalMemory(Shader* kernel);
138
139 /// Configures a global memory buffer.
140 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
141 size_t size, BindlessSSBO* ssbo);
142
143 /// Configures the current textures to use for the draw command. 122 /// Configures the current textures to use for the draw command.
144 void SetupDrawTextures(const Shader* shader, size_t stage_index); 123 void SetupDrawTextures(const Shader* shader, size_t stage_index);
145 124
@@ -152,6 +131,9 @@ private:
152 /// Configures images in a compute shader. 131 /// Configures images in a compute shader.
153 void SetupComputeImages(const Shader* shader); 132 void SetupComputeImages(const Shader* shader);
154 133
134 /// Syncs state to match guest's
135 void SyncState();
136
155 /// Syncs the viewport and depth range to match the guest state 137 /// Syncs the viewport and depth range to match the guest state
156 void SyncViewport(); 138 void SyncViewport();
157 139
@@ -215,6 +197,12 @@ private:
215 /// Syncs the framebuffer sRGB state to match the guest state 197 /// Syncs the framebuffer sRGB state to match the guest state
216 void SyncFramebufferSRGB(); 198 void SyncFramebufferSRGB();
217 199
200 /// Syncs vertex formats to match the guest state
201 void SyncVertexFormats();
202
203 /// Syncs vertex instances to match the guest state
204 void SyncVertexInstances();
205
218 /// Syncs transform feedback state to match guest state 206 /// Syncs transform feedback state to match guest state
219 /// @note Only valid on assembly shaders 207 /// @note Only valid on assembly shaders
220 void SyncTransformFeedback(); 208 void SyncTransformFeedback();
@@ -225,19 +213,7 @@ private:
225 /// End a transform feedback 213 /// End a transform feedback
226 void EndTransformFeedback(); 214 void EndTransformFeedback();
227 215
228 std::size_t CalculateVertexArraysSize() const; 216 void SetupShaders(bool is_indexed);
229
230 std::size_t CalculateIndexBufferSize() const;
231
232 /// Updates the current vertex format
233 void SetupVertexFormat();
234
235 void SetupVertexBuffer();
236 void SetupVertexInstances();
237
238 GLintptr SetupIndexBuffer();
239
240 void SetupShaders();
241 217
242 Tegra::GPU& gpu; 218 Tegra::GPU& gpu;
243 Tegra::Engines::Maxwell3D& maxwell3d; 219 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +225,12 @@ private:
249 ProgramManager& program_manager; 225 ProgramManager& program_manager;
250 StateTracker& state_tracker; 226 StateTracker& state_tracker;
251 227
252 OGLStreamBuffer stream_buffer;
253 TextureCacheRuntime texture_cache_runtime; 228 TextureCacheRuntime texture_cache_runtime;
254 TextureCache texture_cache; 229 TextureCache texture_cache;
230 BufferCacheRuntime buffer_cache_runtime;
231 BufferCache buffer_cache;
255 ShaderCacheOpenGL shader_cache; 232 ShaderCacheOpenGL shader_cache;
256 QueryCache query_cache; 233 QueryCache query_cache;
257 OGLBufferCache buffer_cache;
258 FenceManagerOpenGL fence_manager; 234 FenceManagerOpenGL fence_manager;
259 235
260 VideoCommon::Shader::AsyncShaders async_shaders; 236 VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +238,8 @@ private:
262 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 238 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
263 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 239 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
264 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; 240 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
265 std::array<GLuint, MAX_TEXTURES> texture_handles; 241 std::array<GLuint, MAX_TEXTURES> texture_handles{};
266 std::array<GLuint, MAX_IMAGES> image_handles; 242 std::array<GLuint, MAX_IMAGES> image_handles{};
267
268 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
269 transform_feedback_buffers;
270 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
271 enabled_transform_feedback_buffers;
272
273 static constexpr std::size_t NUM_CONSTANT_BUFFERS =
274 Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
275 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
276 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
277 std::size_t current_cbuf = 0;
278 OGLBuffer unified_uniform_buffer;
279 243
280 /// Number of commands queued to the OpenGL driver. Resetted on flush. 244 /// Number of commands queued to the OpenGL driver. Resetted on flush.
281 std::size_t num_queued_commands = 0; 245 std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0e34a0f20..3428e5e21 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -171,12 +171,6 @@ void OGLBuffer::Release() {
171 handle = 0; 171 handle = 0;
172} 172}
173 173
174void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
175 ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
176
177 glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
178}
179
180void OGLSync::Create() { 174void OGLSync::Create() {
181 if (handle != 0) 175 if (handle != 0)
182 return; 176 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index f48398669..552d79db4 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -234,9 +234,6 @@ public:
234 /// Deletes the internal OpenGL resource 234 /// Deletes the internal OpenGL resource
235 void Release(); 235 void Release();
236 236
237 // Converts the buffer into a stream copy buffer with a fixed size
238 void MakeStreamCopy(std::size_t buffer_size);
239
240 GLuint handle = 0; 237 GLuint handle = 0;
241}; 238};
242 239
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c35b71b6b..ac78d344c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); 64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); 65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt 67constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint 68#define ftou floatBitsToUint
69#define itof intBitsToFloat 69#define itof intBitsToFloat
70#define utof uintBitsToFloat 70#define utof uintBitsToFloat
@@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
77 77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); 78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); 79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80
81layout (std140, binding = {}) uniform vs_config {{
82 float y_direction;
83}};
84)"; 80)";
85 81
86class ShaderWriter final { 82class ShaderWriter final {
@@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 398 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
403} 399}
404 400
405bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
406 const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
407 // We waste one UBO for emulation
408 const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
409 return num_ubos > num_available_ubos;
410}
411
412struct GenericVaryingDescription { 401struct GenericVaryingDescription {
413 std::string name; 402 std::string name;
414 u8 first_element = 0; 403 u8 first_element = 0;
@@ -420,9 +409,8 @@ public:
420 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, 409 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
421 ShaderType stage_, std::string_view identifier_, 410 ShaderType stage_, std::string_view identifier_,
422 std::string_view suffix_) 411 std::string_view suffix_)
423 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, 412 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
424 suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ 413 identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
425 UseUnifiedUniforms(device_, ir_, stage_)} {
426 if (stage != ShaderType::Compute) { 414 if (stage != ShaderType::Compute) {
427 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 415 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
428 } 416 }
@@ -516,7 +504,8 @@ private:
516 if (!identifier.empty()) { 504 if (!identifier.empty()) {
517 code.AddLine("// {}", identifier); 505 code.AddLine("// {}", identifier);
518 } 506 }
519 code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); 507 const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
508 code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
520 code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); 509 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
521 if (device.HasShaderBallot()) { 510 if (device.HasShaderBallot()) {
522 code.AddLine("#extension GL_ARB_shader_ballot : require"); 511 code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -542,7 +531,7 @@ private:
542 531
543 code.AddNewLine(); 532 code.AddNewLine();
544 533
545 code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); 534 code.AddLine(COMMON_DECLARATIONS);
546 } 535 }
547 536
548 void DeclareVertex() { 537 void DeclareVertex() {
@@ -865,17 +854,6 @@ private:
865 } 854 }
866 855
867 void DeclareConstantBuffers() { 856 void DeclareConstantBuffers() {
868 if (use_unified_uniforms) {
869 const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
870 static_cast<u32>(ir.GetGlobalMemory().size());
871 code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
872 binding);
873 code.AddLine(" uint cbufs[];");
874 code.AddLine("}};");
875 code.AddNewLine();
876 return;
877 }
878
879 u32 binding = device.GetBaseBindings(stage).uniform_buffer; 857 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
880 for (const auto& [index, info] : ir.GetConstantBuffers()) { 858 for (const auto& [index, info] : ir.GetConstantBuffers()) {
881 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); 859 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
@@ -1081,29 +1059,17 @@ private:
1081 1059
1082 if (const auto cbuf = std::get_if<CbufNode>(&*node)) { 1060 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1083 const Node offset = cbuf->GetOffset(); 1061 const Node offset = cbuf->GetOffset();
1084 const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
1085 1062
1086 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1087 // Direct access 1064 // Direct access
1088 const u32 offset_imm = immediate->GetValue(); 1065 const u32 offset_imm = immediate->GetValue();
1089 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); 1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1090 if (use_unified_uniforms) { 1067 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1091 return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), 1068 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1092 Type::Uint};
1093 } else {
1094 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1095 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1096 Type::Uint};
1097 }
1098 }
1099
1100 // Indirect access
1101 if (use_unified_uniforms) {
1102 return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
1103 Visit(offset).AsUint()),
1104 Type::Uint}; 1069 Type::Uint};
1105 } 1070 }
1106 1071
1072 // Indirect access
1107 const std::string final_offset = code.GenerateTemporary(); 1073 const std::string final_offset = code.GenerateTemporary();
1108 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); 1074 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1109 1075
@@ -2293,7 +2259,6 @@ private:
2293 } 2259 }
2294 } 2260 }
2295 } 2261 }
2296
2297 if (header.ps.omap.depth) { 2262 if (header.ps.omap.depth) {
2298 // The depth output is always 2 registers after the last color output, and current_reg 2263 // The depth output is always 2 registers after the last color output, and current_reg
2299 // already contains one past the last color register. 2264 // already contains one past the last color register.
@@ -2337,7 +2302,8 @@ private:
2337 } 2302 }
2338 2303
2339 Expression YNegate(Operation operation) { 2304 Expression YNegate(Operation operation) {
2340 return {"y_direction", Type::Float}; 2305 // Y_NEGATE is mapped to this uniform value
2306 return {"gl_FrontMaterial.ambient.a", Type::Float};
2341 } 2307 }
2342 2308
2343 template <u32 element> 2309 template <u32 element>
@@ -2787,7 +2753,6 @@ private:
2787 const std::string_view identifier; 2753 const std::string_view identifier;
2788 const std::string_view suffix; 2754 const std::string_view suffix;
2789 const Header header; 2755 const Header header;
2790 const bool use_unified_uniforms;
2791 std::unordered_map<u8, VaryingTFB> transform_feedback; 2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2792 2757
2793 ShaderWriter code; 2758 ShaderWriter code;
@@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s
3003 for (std::size_t i = 0; i < std::size(clip_distances); ++i) { 2968 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
3004 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; 2969 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
3005 } 2970 }
2971 for (const auto& buffer : entries.const_buffers) {
2972 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
2973 }
3006 entries.shader_length = ir.GetLength(); 2974 entries.shader_length = ir.GetLength();
3007 entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
3008 return entries; 2975 return entries;
3009} 2976}
3010 2977
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index be68994bb..0397a000c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -55,7 +55,7 @@ struct ShaderEntries {
55 std::vector<ImageEntry> images; 55 std::vector<ImageEntry> images;
56 std::size_t shader_length{}; 56 std::size_t shader_length{};
57 u32 clip_distances{}; 57 u32 clip_distances{};
58 bool use_unified_uniforms{}; 58 u32 enabled_uniform_buffers{};
59}; 59};
60 60
61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 60e6fa39f..dbdf5230f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) {
36 FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); 36 FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
37} 37}
38 38
39void SetupDirtyVertexArrays(Tables& tables) { 39void SetupDirtyVertexInstances(Tables& tables) {
40 static constexpr std::size_t num_array = 3;
41 static constexpr std::size_t instance_base_offset = 3; 40 static constexpr std::size_t instance_base_offset = 3;
42 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { 41 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
43 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); 42 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
44 const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
45
46 FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
47 FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
48
49 const std::size_t instance_array_offset = array_offset + instance_base_offset; 43 const std::size_t instance_array_offset = array_offset + instance_base_offset;
50 tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); 44 tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
51 tables[1][instance_array_offset] = VertexInstances; 45 tables[1][instance_array_offset] = VertexInstances;
@@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) {
217StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { 211StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
218 auto& dirty = gpu.Maxwell3D().dirty; 212 auto& dirty = gpu.Maxwell3D().dirty;
219 auto& tables = dirty.tables; 213 auto& tables = dirty.tables;
220 SetupDirtyRenderTargets(tables); 214 SetupDirtyFlags(tables);
221 SetupDirtyColorMasks(tables); 215 SetupDirtyColorMasks(tables);
222 SetupDirtyViewports(tables); 216 SetupDirtyViewports(tables);
223 SetupDirtyScissors(tables); 217 SetupDirtyScissors(tables);
224 SetupDirtyVertexArrays(tables); 218 SetupDirtyVertexInstances(tables);
225 SetupDirtyVertexFormat(tables); 219 SetupDirtyVertexFormat(tables);
226 SetupDirtyShaders(tables); 220 SetupDirtyShaders(tables);
227 SetupDirtyPolygonModes(tables); 221 SetupDirtyPolygonModes(tables);
@@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
241 SetupDirtyClipControl(tables); 235 SetupDirtyClipControl(tables);
242 SetupDirtyDepthClampEnabled(tables); 236 SetupDirtyDepthClampEnabled(tables);
243 SetupDirtyMisc(tables); 237 SetupDirtyMisc(tables);
244
245 auto& store = dirty.on_write_stores;
246 store[VertexBuffers] = true;
247 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
248 store[VertexBuffer0 + i] = true;
249 }
250}
251
252void StateTracker::InvalidateStreamBuffer() {
253 flags[Dirty::VertexBuffers] = true;
254 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
255 flags[index] = true;
256 }
257} 238}
258 239
259} // namespace OpenGL 240} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 574615d3c..94c905116 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -28,10 +28,6 @@ enum : u8 {
28 VertexFormat0, 28 VertexFormat0,
29 VertexFormat31 = VertexFormat0 + 31, 29 VertexFormat31 = VertexFormat0 + 31,
30 30
31 VertexBuffers,
32 VertexBuffer0,
33 VertexBuffer31 = VertexBuffer0 + 31,
34
35 VertexInstances, 31 VertexInstances,
36 VertexInstance0, 32 VertexInstance0,
37 VertexInstance31 = VertexInstance0 + 31, 33 VertexInstance31 = VertexInstance0 + 31,
@@ -92,8 +88,6 @@ class StateTracker {
92public: 88public:
93 explicit StateTracker(Tegra::GPU& gpu); 89 explicit StateTracker(Tegra::GPU& gpu);
94 90
95 void InvalidateStreamBuffer();
96
97 void BindIndexBuffer(GLuint new_index_buffer) { 91 void BindIndexBuffer(GLuint new_index_buffer) {
98 if (index_buffer == new_index_buffer) { 92 if (index_buffer == new_index_buffer) {
99 return; 93 return;
@@ -110,13 +104,32 @@ public:
110 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); 104 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
111 } 105 }
112 106
107 void ClipControl(GLenum new_origin, GLenum new_depth) {
108 if (new_origin == origin && new_depth == depth) {
109 return;
110 }
111 origin = new_origin;
112 depth = new_depth;
113 glClipControl(origin, depth);
114 }
115
116 void SetYNegate(bool new_y_negate) {
117 if (new_y_negate == y_negate) {
118 return;
119 }
120 // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
121 y_negate = new_y_negate;
122 const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f};
123 glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data());
124 }
125
113 void NotifyScreenDrawVertexArray() { 126 void NotifyScreenDrawVertexArray() {
114 flags[OpenGL::Dirty::VertexFormats] = true; 127 flags[OpenGL::Dirty::VertexFormats] = true;
115 flags[OpenGL::Dirty::VertexFormat0 + 0] = true; 128 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
116 flags[OpenGL::Dirty::VertexFormat0 + 1] = true; 129 flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
117 130
118 flags[OpenGL::Dirty::VertexBuffers] = true; 131 flags[VideoCommon::Dirty::VertexBuffers] = true;
119 flags[OpenGL::Dirty::VertexBuffer0] = true; 132 flags[VideoCommon::Dirty::VertexBuffer0] = true;
120 133
121 flags[OpenGL::Dirty::VertexInstances] = true; 134 flags[OpenGL::Dirty::VertexInstances] = true;
122 flags[OpenGL::Dirty::VertexInstance0 + 0] = true; 135 flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
@@ -202,6 +215,9 @@ private:
202 215
203 GLuint framebuffer = 0; 216 GLuint framebuffer = 0;
204 GLuint index_buffer = 0; 217 GLuint index_buffer = 0;
218 GLenum origin = GL_LOWER_LEFT;
219 GLenum depth = GL_NEGATIVE_ONE_TO_ONE;
220 bool y_negate = false;
205}; 221};
206 222
207} // namespace OpenGL 223} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e0819cdf2..77b3ee0fe 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -1,70 +1,64 @@
1// Copyright 2018 Citra Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <tuple> 5#include <array>
6#include <vector> 6#include <memory>
7#include <span>
8
9#include <glad/glad.h>
7 10
8#include "common/alignment.h" 11#include "common/alignment.h"
9#include "common/assert.h" 12#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
13#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
14 14
15MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
16 MP_RGB(128, 128, 192));
17
18namespace OpenGL { 15namespace OpenGL {
19 16
20OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) 17StreamBuffer::StreamBuffer() {
21 : state_tracker{state_tracker_} { 18 static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
22 gl_buffer.Create(); 19 buffer.Create();
23 20 glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
24 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; 21 glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
25 glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); 22 mapped_pointer =
26 mapped_ptr = static_cast<u8*>( 23 static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
27 glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 24 for (OGLSync& sync : fences) {
28 25 sync.Create();
29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 } 26 }
33} 27}
34 28
35OGLStreamBuffer::~OGLStreamBuffer() { 29std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
36 glUnmapNamedBuffer(gl_buffer.handle); 30 ASSERT(size < REGION_SIZE);
37 gl_buffer.Release(); 31 for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
38} 32 ++region) {
39 33 fences[region].Create();
40std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
41 ASSERT(size <= BUFFER_SIZE);
42 ASSERT(alignment <= BUFFER_SIZE);
43 mapped_size = size;
44
45 if (alignment > 0) {
46 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
47 } 34 }
35 used_iterator = iterator;
48 36
49 if (buffer_pos + size > BUFFER_SIZE) { 37 for (size_t region = Region(free_iterator) + 1,
50 MICROPROFILE_SCOPE(OpenGL_StreamBuffer); 38 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
51 glInvalidateBufferData(gl_buffer.handle); 39 region < region_end; ++region) {
52 state_tracker.InvalidateStreamBuffer(); 40 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
53 41 fences[region].Release();
54 buffer_pos = 0;
55 } 42 }
56 43 if (iterator + size >= free_iterator) {
57 return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); 44 free_iterator = iterator + size;
58}
59
60void OGLStreamBuffer::Unmap(GLsizeiptr size) {
61 ASSERT(size <= mapped_size);
62
63 if (size > 0) {
64 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
65 } 45 }
66 46 if (iterator + size > STREAM_BUFFER_SIZE) {
67 buffer_pos += size; 47 for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
48 fences[region].Create();
49 }
50 used_iterator = 0;
51 iterator = 0;
52 free_iterator = size;
53
54 for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
55 glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
56 fences[region].Release();
57 }
58 }
59 const size_t offset = iterator;
60 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
61 return {std::span(mapped_pointer + offset, size), offset};
68} 62}
69 63
70} // namespace OpenGL 64} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index dd9cf67eb..6dbb6bfba 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -1,9 +1,12 @@
1// Copyright 2018 Citra Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <memory>
9#include <span>
7#include <utility> 10#include <utility>
8 11
9#include <glad/glad.h> 12#include <glad/glad.h>
@@ -13,48 +16,35 @@
13 16
14namespace OpenGL { 17namespace OpenGL {
15 18
16class Device; 19class StreamBuffer {
17class StateTracker; 20 static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
21 static constexpr size_t NUM_SYNCS = 16;
22 static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS;
23 static constexpr size_t MAX_ALIGNMENT = 256;
24 static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0);
25 static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0);
26 static_assert(REGION_SIZE % MAX_ALIGNMENT == 0);
18 27
19class OGLStreamBuffer : private NonCopyable {
20public: 28public:
21 explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); 29 explicit StreamBuffer();
22 ~OGLStreamBuffer();
23
24 /*
25 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
26 * and the optional alignment requirement.
27 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
28 * The return values are the pointer to the new chunk, and the offset within the buffer.
29 * The actual used size must be specified on unmapping the chunk.
30 */
31 std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
32
33 void Unmap(GLsizeiptr size);
34
35 GLuint Handle() const {
36 return gl_buffer.handle;
37 }
38 30
39 u64 Address() const { 31 [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept;
40 return gpu_address;
41 }
42 32
43 GLsizeiptr Size() const noexcept { 33 [[nodiscard]] GLuint Handle() const noexcept {
44 return BUFFER_SIZE; 34 return buffer.handle;
45 } 35 }
46 36
47private: 37private:
48 static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; 38 [[nodiscard]] static size_t Region(size_t offset) noexcept {
49 39 return offset / REGION_SIZE;
50 StateTracker& state_tracker; 40 }
51
52 OGLBuffer gl_buffer;
53 41
54 GLuint64EXT gpu_address = 0; 42 size_t iterator = 0;
55 GLintptr buffer_pos = 0; 43 size_t used_iterator = 0;
56 GLsizeiptr mapped_size = 0; 44 size_t free_iterator = 0;
57 u8* mapped_ptr = nullptr; 45 u8* mapped_pointer = nullptr;
46 OGLBuffer buffer;
47 std::array<OGLSync, NUM_SYNCS> fences;
58}; 48};
59 49
60} // namespace OpenGL 50} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 546cb6d00..12434db67 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
398 398
399} // Anonymous namespace 399} // Anonymous namespace
400 400
401ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
402 : span(map, size), sync{sync_}, handle{handle_} {}
403
404ImageBufferMap::~ImageBufferMap() { 401ImageBufferMap::~ImageBufferMap() {
405 if (sync) { 402 if (sync) {
406 sync->Create(); 403 sync->Create();
@@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() {
487 glFinish(); 484 glFinish();
488} 485}
489 486
490ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { 487ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
491 return upload_buffers.RequestMap(size, true); 488 return upload_buffers.RequestMap(size, true);
492} 489}
493 490
494ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { 491ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
495 return download_buffers.RequestMap(size, false); 492 return download_buffers.RequestMap(size, false);
496} 493}
497 494
@@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
553} 550}
554 551
555void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, 552void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
556 size_t buffer_offset,
557 std::span<const SwizzleParameters> swizzles) { 553 std::span<const SwizzleParameters> swizzles) {
558 switch (image.info.type) { 554 switch (image.info.type) {
559 case ImageType::e2D: 555 case ImageType::e2D:
560 return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); 556 return util_shaders.BlockLinearUpload2D(image, map, swizzles);
561 case ImageType::e3D: 557 case ImageType::e3D:
562 return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); 558 return util_shaders.BlockLinearUpload3D(image, map, swizzles);
563 case ImageType::Linear: 559 case ImageType::Linear:
564 return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); 560 return util_shaders.PitchUpload(image, map, swizzles);
565 default: 561 default:
566 UNREACHABLE(); 562 UNREACHABLE();
567 break; 563 break;
@@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_
596 bool insert_fence) { 592 bool insert_fence) {
597 const size_t index = RequestBuffer(requested_size); 593 const size_t index = RequestBuffer(requested_size);
598 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; 594 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
599 return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); 595 return ImageBufferMap{
596 .mapped_span = std::span(maps[index], requested_size),
597 .sync = sync,
598 .buffer = buffers[index].handle,
599 };
600} 600}
601 601
602size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { 602size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
@@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
709 } 709 }
710} 710}
711 711
712void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 712void Image::UploadMemory(const ImageBufferMap& map,
713 std::span<const VideoCommon::BufferImageCopy> copies) { 713 std::span<const VideoCommon::BufferImageCopy> copies) {
714 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); 714 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
715 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); 715 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
716 716
717 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); 717 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
718 718
@@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
728 current_image_height = copy.buffer_image_height; 728 current_image_height = copy.buffer_image_height;
729 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); 729 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
730 } 730 }
731 CopyBufferToImage(copy, buffer_offset); 731 CopyBufferToImage(copy, map.offset);
732 } 732 }
733} 733}
734 734
735void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 735void Image::UploadMemory(const ImageBufferMap& map,
736 std::span<const VideoCommon::BufferCopy> copies) { 736 std::span<const VideoCommon::BufferCopy> copies) {
737 for (const VideoCommon::BufferCopy& copy : copies) { 737 for (const VideoCommon::BufferCopy& copy : copies) {
738 glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, 738 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
739 copy.dst_offset, copy.size); 739 copy.dst_offset, copy.size);
740 } 740 }
741} 741}
742 742
743void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, 743void Image::DownloadMemory(ImageBufferMap& map,
744 std::span<const VideoCommon::BufferImageCopy> copies) { 744 std::span<const VideoCommon::BufferImageCopy> copies) {
745 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 745 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
746 746
747 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); 747 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
748 glPixelStorei(GL_PACK_ALIGNMENT, 1); 748 glPixelStorei(GL_PACK_ALIGNMENT, 1);
749 749
750 u32 current_row_length = std::numeric_limits<u32>::max(); 750 u32 current_row_length = std::numeric_limits<u32>::max();
@@ -759,7 +759,38 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
759 current_image_height = copy.buffer_image_height; 759 current_image_height = copy.buffer_image_height;
760 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); 760 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
761 } 761 }
762 CopyImageToBuffer(copy, buffer_offset); 762 CopyImageToBuffer(copy, map.offset);
763 }
764}
765
766GLuint Image::StorageHandle() noexcept {
767 switch (info.format) {
768 case PixelFormat::A8B8G8R8_SRGB:
769 case PixelFormat::B8G8R8A8_SRGB:
770 case PixelFormat::BC1_RGBA_SRGB:
771 case PixelFormat::BC2_SRGB:
772 case PixelFormat::BC3_SRGB:
773 case PixelFormat::BC7_SRGB:
774 case PixelFormat::ASTC_2D_4X4_SRGB:
775 case PixelFormat::ASTC_2D_8X8_SRGB:
776 case PixelFormat::ASTC_2D_8X5_SRGB:
777 case PixelFormat::ASTC_2D_5X4_SRGB:
778 case PixelFormat::ASTC_2D_5X5_SRGB:
779 case PixelFormat::ASTC_2D_10X8_SRGB:
780 case PixelFormat::ASTC_2D_6X6_SRGB:
781 case PixelFormat::ASTC_2D_10X10_SRGB:
782 case PixelFormat::ASTC_2D_12X12_SRGB:
783 case PixelFormat::ASTC_2D_8X6_SRGB:
784 case PixelFormat::ASTC_2D_6X5_SRGB:
785 if (store_view.handle != 0) {
786 return store_view.handle;
787 }
788 store_view.Create();
789 glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0,
790 info.resources.levels, 0, info.resources.layers);
791 return store_view.handle;
792 default:
793 return texture.handle;
763 } 794 }
764} 795}
765 796
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 15b7c3676..a6172f009 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -31,23 +31,13 @@ using VideoCommon::NUM_RT;
31using VideoCommon::Offset2D; 31using VideoCommon::Offset2D;
32using VideoCommon::RenderTargets; 32using VideoCommon::RenderTargets;
33 33
34class ImageBufferMap { 34struct ImageBufferMap {
35public:
36 explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
37 ~ImageBufferMap(); 35 ~ImageBufferMap();
38 36
39 GLuint Handle() const noexcept { 37 std::span<u8> mapped_span;
40 return handle; 38 size_t offset = 0;
41 }
42
43 std::span<u8> Span() const noexcept {
44 return span;
45 }
46
47private:
48 std::span<u8> span;
49 OGLSync* sync; 39 OGLSync* sync;
50 GLuint handle; 40 GLuint buffer;
51}; 41};
52 42
53struct FormatProperties { 43struct FormatProperties {
@@ -69,9 +59,9 @@ public:
69 59
70 void Finish(); 60 void Finish();
71 61
72 ImageBufferMap MapUploadBuffer(size_t size); 62 ImageBufferMap UploadStagingBuffer(size_t size);
73 63
74 ImageBufferMap MapDownloadBuffer(size_t size); 64 ImageBufferMap DownloadStagingBuffer(size_t size);
75 65
76 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 66 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
77 67
@@ -89,7 +79,7 @@ public:
89 Tegra::Engines::Fermi2D::Filter filter, 79 Tegra::Engines::Fermi2D::Filter filter,
90 Tegra::Engines::Fermi2D::Operation operation); 80 Tegra::Engines::Fermi2D::Operation operation);
91 81
92 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 82 void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
93 std::span<const VideoCommon::SwizzleParameters> swizzles); 83 std::span<const VideoCommon::SwizzleParameters> swizzles);
94 84
95 void InsertUploadMemoryBarrier(); 85 void InsertUploadMemoryBarrier();
@@ -148,14 +138,14 @@ public:
148 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 138 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
149 VAddr cpu_addr); 139 VAddr cpu_addr);
150 140
151 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 141 void UploadMemory(const ImageBufferMap& map,
152 std::span<const VideoCommon::BufferImageCopy> copies); 142 std::span<const VideoCommon::BufferImageCopy> copies);
153 143
154 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 144 void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
155 std::span<const VideoCommon::BufferCopy> copies); 145
146 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
156 147
157 void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, 148 GLuint StorageHandle() noexcept;
158 std::span<const VideoCommon::BufferImageCopy> copies);
159 149
160 GLuint Handle() const noexcept { 150 GLuint Handle() const noexcept {
161 return texture.handle; 151 return texture.handle;
@@ -167,8 +157,8 @@ private:
167 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); 157 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
168 158
169 OGLTexture texture; 159 OGLTexture texture;
170 OGLTextureView store_view;
171 OGLBuffer buffer; 160 OGLBuffer buffer;
161 OGLTextureView store_view;
172 GLenum gl_internal_format = GL_NONE; 162 GLenum gl_internal_format = GL_NONE;
173 GLenum gl_format = GL_NONE; 163 GLenum gl_format = GL_NONE;
174 GLenum gl_type = GL_NONE; 164 GLenum gl_type = GL_NONE;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index cbccfdeb4..f7ad8f370 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -4,23 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <glad/glad.h> 7#include <glad/glad.h>
9#include "common/common_types.h"
10#include "common/logging/log.h"
11#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
12 9
13namespace OpenGL { 10namespace OpenGL::MaxwellToGL {
14
15using GLvec2 = std::array<GLfloat, 2>;
16using GLvec3 = std::array<GLfloat, 3>;
17using GLvec4 = std::array<GLfloat, 4>;
18
19using GLuvec2 = std::array<GLuint, 2>;
20using GLuvec3 = std::array<GLuint, 3>;
21using GLuvec4 = std::array<GLuint, 4>;
22
23namespace MaxwellToGL {
24 11
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 12using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26 13
@@ -317,26 +304,6 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
317 return GL_ZERO; 304 return GL_ZERO;
318} 305}
319 306
320inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
321 switch (source) {
322 case Tegra::Texture::SwizzleSource::Zero:
323 return GL_ZERO;
324 case Tegra::Texture::SwizzleSource::R:
325 return GL_RED;
326 case Tegra::Texture::SwizzleSource::G:
327 return GL_GREEN;
328 case Tegra::Texture::SwizzleSource::B:
329 return GL_BLUE;
330 case Tegra::Texture::SwizzleSource::A:
331 return GL_ALPHA;
332 case Tegra::Texture::SwizzleSource::OneInt:
333 case Tegra::Texture::SwizzleSource::OneFloat:
334 return GL_ONE;
335 }
336 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source);
337 return GL_ZERO;
338}
339
340inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { 307inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
341 switch (comparison) { 308 switch (comparison) {
342 case Maxwell::ComparisonOp::Never: 309 case Maxwell::ComparisonOp::Never:
@@ -493,5 +460,4 @@ inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
493 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); 460 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
494} 461}
495 462
496} // namespace MaxwellToGL 463} // namespace OpenGL::MaxwellToGL
497} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 21159e498..9d2acd4d9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -29,9 +29,7 @@
29#include "video_core/textures/decoders.h" 29#include "video_core/textures/decoders.h"
30 30
31namespace OpenGL { 31namespace OpenGL {
32
33namespace { 32namespace {
34
35constexpr GLint PositionLocation = 0; 33constexpr GLint PositionLocation = 0;
36constexpr GLint TexCoordLocation = 1; 34constexpr GLint TexCoordLocation = 1;
37constexpr GLint ModelViewMatrixLocation = 0; 35constexpr GLint ModelViewMatrixLocation = 0;
@@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
124 break; 122 break;
125 } 123 }
126} 124}
127
128} // Anonymous namespace 125} // Anonymous namespace
129 126
130RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, 127RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
@@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
132 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 129 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
133 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 130 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
134 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, 131 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
135 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} 132 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
133 program_manager{device},
134 rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
135 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
136 glEnable(GL_DEBUG_OUTPUT);
137 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
138 glDebugMessageCallback(DebugHandler, nullptr);
139 }
140 AddTelemetryFields();
141 InitOpenGLObjects();
142}
136 143
137RendererOpenGL::~RendererOpenGL() = default; 144RendererOpenGL::~RendererOpenGL() = default;
138 145
@@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
148 155
149 ++m_current_frame; 156 ++m_current_frame;
150 157
151 rasterizer->TickFrame(); 158 rasterizer.TickFrame();
152 159
153 context->SwapBuffers(); 160 context->SwapBuffers();
154 render_window.OnFrameDisplayed(); 161 render_window.OnFrameDisplayed();
@@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
179 framebuffer_crop_rect = framebuffer.crop_rect; 186 framebuffer_crop_rect = framebuffer.crop_rect;
180 187
181 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; 188 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
182 if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { 189 if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
183 return; 190 return;
184 } 191 }
185 192
@@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() {
267 // Enable unified vertex attributes and query vertex buffer address when the driver supports it 274 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
268 if (device.HasVertexBufferUnifiedMemory()) { 275 if (device.HasVertexBufferUnifiedMemory()) {
269 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); 276 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
277 glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
270 278
271 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); 279 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
272 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, 280 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
@@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() {
289 telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); 297 telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
290} 298}
291 299
292void RendererOpenGL::CreateRasterizer() {
293 if (rasterizer) {
294 return;
295 }
296 rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
297 screen_info, program_manager, state_tracker);
298}
299
300void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 300void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
301 const Tegra::FramebufferConfig& framebuffer) { 301 const Tegra::FramebufferConfig& framebuffer) {
302 texture.width = framebuffer.width; 302 texture.width = framebuffer.width;
@@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
407 407
408 program_manager.BindHostPipeline(pipeline.handle); 408 program_manager.BindHostPipeline(pipeline.handle);
409 409
410 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
410 glEnable(GL_CULL_FACE); 411 glEnable(GL_CULL_FACE);
411 if (screen_info.display_srgb) { 412 if (screen_info.display_srgb) {
412 glEnable(GL_FRAMEBUFFER_SRGB); 413 glEnable(GL_FRAMEBUFFER_SRGB);
@@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
425 glCullFace(GL_BACK); 426 glCullFace(GL_BACK);
426 glFrontFace(GL_CW); 427 glFrontFace(GL_CW);
427 glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); 428 glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
428 glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
429 glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), 429 glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
430 static_cast<GLfloat>(layout.height)); 430 static_cast<GLfloat>(layout.height));
431 glDepthRangeIndexed(0, 0.0, 0.0); 431 glDepthRangeIndexed(0, 0.0, 0.0);
@@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() {
497 renderer_settings.screenshot_requested = false; 497 renderer_settings.screenshot_requested = false;
498} 498}
499 499
500bool RendererOpenGL::Init() {
501 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
502 glEnable(GL_DEBUG_OUTPUT);
503 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
504 glDebugMessageCallback(DebugHandler, nullptr);
505 }
506
507 AddTelemetryFields();
508
509 if (!GLAD_GL_VERSION_4_6) {
510 return false;
511 }
512
513 InitOpenGLObjects();
514 CreateRasterizer();
515
516 return true;
517}
518
519void RendererOpenGL::ShutDown() {}
520
521} // namespace OpenGL 500} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 44e109794..cc19a110f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,6 +10,7 @@
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
12#include "video_core/renderer_opengl/gl_device.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_shader_manager.h" 15#include "video_core/renderer_opengl/gl_shader_manager.h"
15#include "video_core/renderer_opengl/gl_state_tracker.h" 16#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -63,18 +64,18 @@ public:
63 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 64 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
64 ~RendererOpenGL() override; 65 ~RendererOpenGL() override;
65 66
66 bool Init() override;
67 void ShutDown() override;
68 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 67 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
69 68
69 VideoCore::RasterizerInterface* ReadRasterizer() override {
70 return &rasterizer;
71 }
72
70private: 73private:
71 /// Initializes the OpenGL state and creates persistent objects. 74 /// Initializes the OpenGL state and creates persistent objects.
72 void InitOpenGLObjects(); 75 void InitOpenGLObjects();
73 76
74 void AddTelemetryFields(); 77 void AddTelemetryFields();
75 78
76 void CreateRasterizer();
77
78 void ConfigureFramebufferTexture(TextureInfo& texture, 79 void ConfigureFramebufferTexture(TextureInfo& texture,
79 const Tegra::FramebufferConfig& framebuffer); 80 const Tegra::FramebufferConfig& framebuffer);
80 81
@@ -98,8 +99,10 @@ private:
98 Core::Memory::Memory& cpu_memory; 99 Core::Memory::Memory& cpu_memory;
99 Tegra::GPU& gpu; 100 Tegra::GPU& gpu;
100 101
101 const Device device; 102 Device device;
102 StateTracker state_tracker{gpu}; 103 StateTracker state_tracker;
104 ProgramManager program_manager;
105 RasterizerOpenGL rasterizer;
103 106
104 // OpenGL object IDs 107 // OpenGL object IDs
105 OGLSampler present_sampler; 108 OGLSampler present_sampler;
@@ -115,9 +118,6 @@ private:
115 /// Display information for Switch screen 118 /// Display information for Switch screen
116 ScreenInfo screen_info; 119 ScreenInfo screen_info;
117 120
118 /// Global dummy shader pipeline
119 ProgramManager program_manager;
120
121 /// OpenGL framebuffer data 121 /// OpenGL framebuffer data
122 std::vector<u8> gl_framebuffer_data; 122 std::vector<u8> gl_framebuffer_data;
123 123
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index eb849cbf2..31ec68505 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
63 63
64UtilShaders::~UtilShaders() = default; 64UtilShaders::~UtilShaders() = default;
65 65
66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
67 std::span<const SwizzleParameters> swizzles) { 67 std::span<const SwizzleParameters> swizzles) {
68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; 69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
72 72
73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); 73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
74 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); 74 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
76 76
77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
78 for (const SwizzleParameters& swizzle : swizzles) { 78 for (const SwizzleParameters& swizzle : swizzles) {
79 const Extent3D num_tiles = swizzle.num_tiles; 79 const Extent3D num_tiles = swizzle.num_tiles;
80 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 80 const size_t input_offset = swizzle.buffer_offset + map.offset;
81 81
82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -91,16 +91,16 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
91 glUniform1ui(5, params.x_shift); 91 glUniform1ui(5, params.x_shift);
92 glUniform1ui(6, params.block_height); 92 glUniform1ui(6, params.block_height);
93 glUniform1ui(7, params.block_height_mask); 93 glUniform1ui(7, params.block_height_mask);
94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), 94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
95 input_offset, image.guest_size_bytes - swizzle.buffer_offset); 95 image.guest_size_bytes - swizzle.buffer_offset);
96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, 96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
97 GL_WRITE_ONLY, store_format); 97 GL_WRITE_ONLY, store_format);
98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); 98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
99 } 99 }
100 program_manager.RestoreGuestCompute(); 100 program_manager.RestoreGuestCompute();
101} 101}
102 102
103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
104 std::span<const SwizzleParameters> swizzles) { 104 std::span<const SwizzleParameters> swizzles) {
105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; 105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
106 106
@@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
108 static constexpr GLuint BINDING_INPUT_BUFFER = 1; 108 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
110 110
111 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); 111 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); 112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
114 114
115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
116 for (const SwizzleParameters& swizzle : swizzles) { 116 for (const SwizzleParameters& swizzle : swizzles) {
117 const Extent3D num_tiles = swizzle.num_tiles; 117 const Extent3D num_tiles = swizzle.num_tiles;
118 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 118 const size_t input_offset = swizzle.buffer_offset + map.offset;
119 119
120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -132,16 +132,16 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
132 glUniform1ui(7, params.block_height_mask); 132 glUniform1ui(7, params.block_height_mask);
133 glUniform1ui(8, params.block_depth); 133 glUniform1ui(8, params.block_depth);
134 glUniform1ui(9, params.block_depth_mask); 134 glUniform1ui(9, params.block_depth_mask);
135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), 135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
136 input_offset, image.guest_size_bytes - swizzle.buffer_offset); 136 image.guest_size_bytes - swizzle.buffer_offset);
137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, 137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
138 GL_WRITE_ONLY, store_format); 138 GL_WRITE_ONLY, store_format);
139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); 139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
140 } 140 }
141 program_manager.RestoreGuestCompute(); 141 program_manager.RestoreGuestCompute();
142} 142}
143 143
144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
145 std::span<const SwizzleParameters> swizzles) { 145 std::span<const SwizzleParameters> swizzles) {
146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
147 static constexpr GLuint BINDING_INPUT_BUFFER = 0; 147 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,21 +159,22 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
159 "Non-power of two images are not implemented"); 159 "Non-power of two images are not implemented");
160 160
161 program_manager.BindHostCompute(pitch_unswizzle_program.handle); 161 program_manager.BindHostCompute(pitch_unswizzle_program.handle);
162 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); 162 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
163 glUniform2ui(LOC_ORIGIN, 0, 0); 163 glUniform2ui(LOC_ORIGIN, 0, 0);
164 glUniform2i(LOC_DESTINATION, 0, 0); 164 glUniform2i(LOC_DESTINATION, 0, 0);
165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); 165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
166 glUniform1ui(LOC_PITCH, pitch); 166 glUniform1ui(LOC_PITCH, pitch);
167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); 167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY,
168 format);
168 for (const SwizzleParameters& swizzle : swizzles) { 169 for (const SwizzleParameters& swizzle : swizzles) {
169 const Extent3D num_tiles = swizzle.num_tiles; 170 const Extent3D num_tiles = swizzle.num_tiles;
170 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 171 const size_t input_offset = swizzle.buffer_offset + map.offset;
171 172
172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 173 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 174 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
174 175
175 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), 176 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
176 input_offset, image.guest_size_bytes - swizzle.buffer_offset); 177 image.guest_size_bytes - swizzle.buffer_offset);
177 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); 178 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
178 } 179 }
179 program_manager.RestoreGuestCompute(); 180 program_manager.RestoreGuestCompute();
@@ -195,9 +196,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
195 196
196 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); 197 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
197 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); 198 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
198 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, 199 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
199 GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); 200 copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
200 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), 201 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
201 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); 202 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
202 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); 203 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
203 } 204 }
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 359997255..7b1d16b09 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -15,21 +15,22 @@
15namespace OpenGL { 15namespace OpenGL {
16 16
17class Image; 17class Image;
18class ImageBufferMap;
19class ProgramManager; 18class ProgramManager;
20 19
20struct ImageBufferMap;
21
21class UtilShaders { 22class UtilShaders {
22public: 23public:
23 explicit UtilShaders(ProgramManager& program_manager); 24 explicit UtilShaders(ProgramManager& program_manager);
24 ~UtilShaders(); 25 ~UtilShaders();
25 26
26 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 27 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
27 std::span<const VideoCommon::SwizzleParameters> swizzles); 28 std::span<const VideoCommon::SwizzleParameters> swizzles);
28 29
29 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 30 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
30 std::span<const VideoCommon::SwizzleParameters> swizzles); 31 std::span<const VideoCommon::SwizzleParameters> swizzles);
31 32
32 void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 33 void PitchUpload(Image& image, const ImageBufferMap& map,
33 std::span<const VideoCommon::SwizzleParameters> swizzles); 34 std::span<const VideoCommon::SwizzleParameters> swizzles);
34 35
35 void CopyBC4(Image& dst_image, Image& src_image, 36 void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 85121d9fd..19aaf034f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
531 return {}; 531 return {};
532} 532}
533 533
534VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { 534VkIndexType IndexFormat(Maxwell::IndexFormat index_format) {
535 switch (index_format) { 535 switch (index_format) {
536 case Maxwell::IndexFormat::UnsignedByte: 536 case Maxwell::IndexFormat::UnsignedByte:
537 if (!device.IsExtIndexTypeUint8Supported()) {
538 UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
539 return VK_INDEX_TYPE_UINT16;
540 }
541 return VK_INDEX_TYPE_UINT8_EXT; 537 return VK_INDEX_TYPE_UINT8_EXT;
542 case Maxwell::IndexFormat::UnsignedShort: 538 case Maxwell::IndexFormat::UnsignedShort:
543 return VK_INDEX_TYPE_UINT16; 539 return VK_INDEX_TYPE_UINT16;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7c34b47dc..e3e06ba38 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
53 53
54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); 54VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
55 55
56VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); 56VkIndexType IndexFormat(Maxwell::IndexFormat index_format);
57 57
58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); 58VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
59 59
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 61796e33a..1cc720ddd 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
80 return separated_extensions; 80 return separated_extensions;
81} 81}
82 82
83Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
84 VkSurfaceKHR surface) {
85 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
86 const s32 device_index = Settings::values.vulkan_device.GetValue();
87 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
88 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
89 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
90 }
91 const vk::PhysicalDevice physical_device(devices[device_index], dld);
92 return Device(*instance, physical_device, surface, dld);
93}
83} // Anonymous namespace 94} // Anonymous namespace
84 95
85RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 96RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
86 Core::Frontend::EmuWindow& emu_window, 97 Core::Frontend::EmuWindow& emu_window,
87 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 98 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
88 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 99 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
89 : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, 100 : RendererBase(emu_window, std::move(context_)),
90 cpu_memory{cpu_memory_}, gpu{gpu_} {} 101 telemetry_session(telemetry_session_),
102 cpu_memory(cpu_memory_),
103 gpu(gpu_),
104 library(OpenLibrary()),
105 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
106 true, Settings::values.renderer_debug)),
107 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
108 surface(CreateSurface(instance, render_window)),
109 device(CreateDevice(instance, dld, *surface)),
110 memory_allocator(device, false),
111 state_tracker(gpu),
112 scheduler(device, state_tracker),
113 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
114 render_window.GetFramebufferLayout().height, false),
115 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
116 screen_info),
117 rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
118 memory_allocator, state_tracker, scheduler) {
119 Report();
120} catch (const vk::Exception& exception) {
121 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
122 throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
123}
91 124
92RendererVulkan::~RendererVulkan() { 125RendererVulkan::~RendererVulkan() {
93 ShutDown(); 126 void(device.GetLogical().WaitIdle());
94} 127}
95 128
96void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 129void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
101 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 134 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
102 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 135 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
103 const bool use_accelerated = 136 const bool use_accelerated =
104 rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 137 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
105 const bool is_srgb = use_accelerated && screen_info.is_srgb; 138 const bool is_srgb = use_accelerated && screen_info.is_srgb;
106 if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { 139 if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) {
107 swapchain->Create(layout.width, layout.height, is_srgb); 140 swapchain.Create(layout.width, layout.height, is_srgb);
108 blit_screen->Recreate(); 141 blit_screen.Recreate();
109 } 142 }
110 143
111 scheduler->WaitWorker(); 144 scheduler.WaitWorker();
112 145
113 swapchain->AcquireNextImage(); 146 swapchain.AcquireNextImage();
114 const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); 147 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
115 148
116 scheduler->Flush(render_semaphore); 149 scheduler.Flush(render_semaphore);
117 150
118 if (swapchain->Present(render_semaphore)) { 151 if (swapchain.Present(render_semaphore)) {
119 blit_screen->Recreate(); 152 blit_screen.Recreate();
120 } 153 }
121 154 rasterizer.TickFrame();
122 rasterizer->TickFrame();
123 } 155 }
124 156
125 render_window.OnFrameDisplayed(); 157 render_window.OnFrameDisplayed();
126} 158}
127 159
128bool RendererVulkan::Init() try {
129 library = OpenLibrary();
130 instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
131 true, Settings::values.renderer_debug);
132 if (Settings::values.renderer_debug) {
133 debug_callback = CreateDebugCallback(instance);
134 }
135 surface = CreateSurface(instance, render_window);
136
137 InitializeDevice();
138 Report();
139
140 memory_allocator = std::make_unique<MemoryAllocator>(*device);
141
142 state_tracker = std::make_unique<StateTracker>(gpu);
143
144 scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
145
146 const auto& framebuffer = render_window.GetFramebufferLayout();
147 swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
148 swapchain->Create(framebuffer.width, framebuffer.height, false);
149
150 rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
151 cpu_memory, screen_info, *device,
152 *memory_allocator, *state_tracker, *scheduler);
153
154 blit_screen =
155 std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
156 *memory_allocator, *swapchain, *scheduler, screen_info);
157 return true;
158
159} catch (const vk::Exception& exception) {
160 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
161 return false;
162}
163
164void RendererVulkan::ShutDown() {
165 if (!device) {
166 return;
167 }
168 if (const auto& dev = device->GetLogical()) {
169 dev.WaitIdle();
170 }
171 rasterizer.reset();
172 blit_screen.reset();
173 scheduler.reset();
174 swapchain.reset();
175 memory_allocator.reset();
176 device.reset();
177}
178
179void RendererVulkan::InitializeDevice() {
180 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
181 const s32 device_index = Settings::values.vulkan_device.GetValue();
182 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
183 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
184 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
185 }
186 const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
187 device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
188}
189
190void RendererVulkan::Report() const { 160void RendererVulkan::Report() const {
191 const std::string vendor_name{device->GetVendorName()}; 161 const std::string vendor_name{device.GetVendorName()};
192 const std::string model_name{device->GetModelName()}; 162 const std::string model_name{device.GetModelName()};
193 const std::string driver_version = GetDriverVersion(*device); 163 const std::string driver_version = GetDriverVersion(device);
194 const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); 164 const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
195 165
196 const std::string api_version = GetReadableVersion(device->ApiVersion()); 166 const std::string api_version = GetReadableVersion(device.ApiVersion());
197 167
198 const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); 168 const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions());
199 169
200 LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); 170 LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
201 LOG_INFO(Render_Vulkan, "Device: {}", model_name); 171 LOG_INFO(Render_Vulkan, "Device: {}", model_name);
@@ -209,21 +179,4 @@ void RendererVulkan::Report() const {
209 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 179 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
210} 180}
211 181
212std::vector<std::string> RendererVulkan::EnumerateDevices() try {
213 vk::InstanceDispatch dld;
214 const Common::DynamicLibrary library = OpenLibrary();
215 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
216 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
217 std::vector<std::string> names;
218 names.reserve(physical_devices.size());
219 for (const VkPhysicalDevice device : physical_devices) {
220 names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
221 }
222 return names;
223
224} catch (const vk::Exception& exception) {
225 LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
226 return {};
227}
228
229} // namespace Vulkan 182} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index daf55b9b4..72071316c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,8 +9,14 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/dynamic_library.h" 11#include "common/dynamic_library.h"
12
13#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/renderer_vulkan/vk_blit_screen.h"
14#include "video_core/renderer_vulkan/vk_rasterizer.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_state_tracker.h"
17#include "video_core/renderer_vulkan/vk_swapchain.h"
18#include "video_core/vulkan_common/vulkan_device.h"
19#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 20#include "video_core/vulkan_common/vulkan_wrapper.h"
15 21
16namespace Core { 22namespace Core {
@@ -27,20 +33,6 @@ class GPU;
27 33
28namespace Vulkan { 34namespace Vulkan {
29 35
30class Device;
31class StateTracker;
32class MemoryAllocator;
33class VKBlitScreen;
34class VKSwapchain;
35class VKScheduler;
36
37struct VKScreenInfo {
38 VkImageView image_view{};
39 u32 width{};
40 u32 height{};
41 bool is_srgb{};
42};
43
44class RendererVulkan final : public VideoCore::RendererBase { 36class RendererVulkan final : public VideoCore::RendererBase {
45public: 37public:
46 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 38 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -49,15 +41,13 @@ public:
49 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 41 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
50 ~RendererVulkan() override; 42 ~RendererVulkan() override;
51 43
52 bool Init() override;
53 void ShutDown() override;
54 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 44 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
55 45
56 static std::vector<std::string> EnumerateDevices(); 46 VideoCore::RasterizerInterface* ReadRasterizer() override {
47 return &rasterizer;
48 }
57 49
58private: 50private:
59 void InitializeDevice();
60
61 void Report() const; 51 void Report() const;
62 52
63 Core::TelemetrySession& telemetry_session; 53 Core::TelemetrySession& telemetry_session;
@@ -68,18 +58,18 @@ private:
68 vk::InstanceDispatch dld; 58 vk::InstanceDispatch dld;
69 59
70 vk::Instance instance; 60 vk::Instance instance;
71 61 vk::DebugUtilsMessenger debug_callback;
72 vk::SurfaceKHR surface; 62 vk::SurfaceKHR surface;
73 63
74 VKScreenInfo screen_info; 64 VKScreenInfo screen_info;
75 65
76 vk::DebugUtilsMessenger debug_callback; 66 Device device;
77 std::unique_ptr<Device> device; 67 MemoryAllocator memory_allocator;
78 std::unique_ptr<MemoryAllocator> memory_allocator; 68 StateTracker state_tracker;
79 std::unique_ptr<StateTracker> state_tracker; 69 VKScheduler scheduler;
80 std::unique_ptr<VKScheduler> scheduler; 70 VKSwapchain swapchain;
81 std::unique_ptr<VKSwapchain> swapchain; 71 VKBlitScreen blit_screen;
82 std::unique_ptr<VKBlitScreen> blit_screen; 72 RasterizerVulkan rasterizer;
83}; 73};
84 74
85} // namespace Vulkan 75} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 3e3b895e0..a1a32aabe 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -18,7 +18,6 @@
18#include "video_core/gpu.h" 18#include "video_core/gpu.h"
19#include "video_core/host_shaders/vulkan_present_frag_spv.h" 19#include "video_core/host_shaders/vulkan_present_frag_spv.h"
20#include "video_core/host_shaders/vulkan_present_vert_spv.h" 20#include "video_core/host_shaders/vulkan_present_vert_spv.h"
21#include "video_core/rasterizer_interface.h"
22#include "video_core/renderer_vulkan/renderer_vulkan.h" 21#include "video_core/renderer_vulkan/renderer_vulkan.h"
23#include "video_core/renderer_vulkan/vk_blit_screen.h" 22#include "video_core/renderer_vulkan/vk_blit_screen.h"
24#include "video_core/renderer_vulkan/vk_master_semaphore.h" 23#include "video_core/renderer_vulkan/vk_master_semaphore.h"
@@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData {
113}; 112};
114 113
115VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, 114VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
116 Core::Frontend::EmuWindow& render_window_, 115 Core::Frontend::EmuWindow& render_window_, const Device& device_,
117 VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
118 MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, 116 MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_,
119 VKScheduler& scheduler_, const VKScreenInfo& screen_info_) 117 VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
120 : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, 118 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
121 device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, 119 memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
122 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 120 image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
123 resource_ticks.resize(image_count); 121 resource_ticks.resize(image_count);
124 122
125 CreateStaticResources(); 123 CreateStaticResources();
@@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
150 SetUniformData(data, framebuffer); 148 SetUniformData(data, framebuffer);
151 SetVertexData(data, framebuffer); 149 SetVertexData(data, framebuffer);
152 150
153 const std::span<u8> map = buffer_commit.Map(); 151 const std::span<u8> mapped_span = buffer_commit.Map();
154 std::memcpy(map.data(), &data, sizeof(data)); 152 std::memcpy(mapped_span.data(), &data, sizeof(data));
155 153
156 if (!use_accelerated) { 154 if (!use_accelerated) {
157 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 155 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
@@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
159 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 157 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
160 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 158 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
161 const size_t size_bytes = GetSizeInBytes(framebuffer); 159 const size_t size_bytes = GetSizeInBytes(framebuffer);
162 rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
163 160
164 // TODO(Rodrigo): Read this from HLE 161 // TODO(Rodrigo): Read this from HLE
165 constexpr u32 block_height_log2 = 4; 162 constexpr u32 block_height_log2 = 4;
166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); 163 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
167 Tegra::Texture::UnswizzleTexture( 164 Tegra::Texture::UnswizzleTexture(
168 map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, 165 mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
169 framebuffer.width, framebuffer.height, 1, block_height_log2, 0); 166 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
170 167
171 const VkBufferImageCopy copy{ 168 const VkBufferImageCopy copy{
172 .bufferOffset = image_offset, 169 .bufferOffset = image_offset,
@@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
266 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
267 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
268 }); 265 });
269
270 return *semaphores[image_index]; 266 return *semaphores[image_index];
271} 267}
272 268
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index b52576957..5e3177685 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -38,12 +38,18 @@ class RasterizerVulkan;
38class VKScheduler; 38class VKScheduler;
39class VKSwapchain; 39class VKSwapchain;
40 40
41class VKBlitScreen final { 41struct VKScreenInfo {
42 VkImageView image_view{};
43 u32 width{};
44 u32 height{};
45 bool is_srgb{};
46};
47
48class VKBlitScreen {
42public: 49public:
43 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, 50 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
44 Core::Frontend::EmuWindow& render_window, 51 Core::Frontend::EmuWindow& render_window, const Device& device,
45 VideoCore::RasterizerInterface& rasterizer, const Device& device, 52 MemoryAllocator& memory_manager, VKSwapchain& swapchain,
46 MemoryAllocator& memory_allocator, VKSwapchain& swapchain,
47 VKScheduler& scheduler, const VKScreenInfo& screen_info); 53 VKScheduler& scheduler, const VKScreenInfo& screen_info);
48 ~VKBlitScreen(); 54 ~VKBlitScreen();
49 55
@@ -84,7 +90,6 @@ private:
84 90
85 Core::Memory::Memory& cpu_memory; 91 Core::Memory::Memory& cpu_memory;
86 Core::Frontend::EmuWindow& render_window; 92 Core::Frontend::EmuWindow& render_window;
87 VideoCore::RasterizerInterface& rasterizer;
88 const Device& device; 93 const Device& device;
89 MemoryAllocator& memory_allocator; 94 MemoryAllocator& memory_allocator;
90 VKSwapchain& swapchain; 95 VKSwapchain& swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8ad40a0f..848eedd66 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -3,188 +3,308 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstring> 7#include <cstring>
7#include <memory> 8#include <span>
9#include <vector>
8 10
9#include "core/core.h"
10#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h" 13#include "video_core/renderer_vulkan/vk_buffer_cache.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
13#include "video_core/renderer_vulkan/vk_stream_buffer.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
14#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
16 20
17namespace Vulkan { 21namespace Vulkan {
18
19namespace { 22namespace {
23VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) {
24 return VkBufferCopy{
25 .srcOffset = copy.src_offset,
26 .dstOffset = copy.dst_offset,
27 .size = copy.size,
28 };
29}
20 30
21constexpr VkBufferUsageFlags BUFFER_USAGE = 31VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) {
22 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | 32 if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) {
23 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; 33 return VK_INDEX_TYPE_UINT8_EXT;
24 34 }
25constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = 35 if (num_elements <= 0xffff) {
26 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 36 return VK_INDEX_TYPE_UINT16;
27 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 37 }
28 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; 38 return VK_INDEX_TYPE_UINT32;
29 39}
30constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 40
34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = 41size_t BytesPerIndex(VkIndexType index_type) {
35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; 42 switch (index_type) {
43 case VK_INDEX_TYPE_UINT8_EXT:
44 return 1;
45 case VK_INDEX_TYPE_UINT16:
46 return 2;
47 case VK_INDEX_TYPE_UINT32:
48 return 4;
49 default:
50 UNREACHABLE_MSG("Invalid index type={}", index_type);
51 return 1;
52 }
53}
36 54
55template <typename T>
56std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
57 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
58 std::ranges::transform(indices, indices.begin(),
59 [quad, first](u32 index) { return first + index + quad * 4; });
60 return indices;
61}
37} // Anonymous namespace 62} // Anonymous namespace
38 63
39Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, 64Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
40 StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) 65 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
41 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ 66
42 staging_pool_} { 67Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
43 buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 68 VAddr cpu_addr_, u64 size_bytes_)
69 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
70 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 71 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 72 .pNext = nullptr,
46 .flags = 0, 73 .flags = 0,
47 .size = static_cast<VkDeviceSize>(size_), 74 .size = SizeBytes(),
48 .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 75 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
76 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
77 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
79 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
49 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 80 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
50 .queueFamilyIndexCount = 0, 81 .queueFamilyIndexCount = 0,
51 .pQueueFamilyIndices = nullptr, 82 .pQueueFamilyIndices = nullptr,
52 }); 83 });
53 commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 84 if (runtime.device.HasDebuggingToolAttached()) {
85 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
86 }
87 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
54} 88}
55 89
56Buffer::~Buffer() = default; 90BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
91 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 VKDescriptorPool& descriptor_pool)
94 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
95 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
96 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
97 quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {}
57 98
58void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { 99StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
59 const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); 100 return staging_pool.Request(size, MemoryUsage::Upload);
60 std::memcpy(staging.mapped_span.data(), data, data_size); 101}
61 102
62 scheduler.RequestOutsideRenderPassOperationContext(); 103StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
104 return staging_pool.Request(size, MemoryUsage::Download);
105}
63 106
64 const VkBuffer handle = Handle(); 107void BufferCacheRuntime::Finish() {
65 scheduler.Record([staging = staging.buffer, handle, offset, data_size, 108 scheduler.Finish();
66 &device = device](vk::CommandBuffer cmdbuf) { 109}
67 const VkBufferMemoryBarrier read_barrier{ 110
68 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 111void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
69 .pNext = nullptr, 112 std::span<const VideoCommon::BufferCopy> copies) {
70 .srcAccessMask = 113 static constexpr VkMemoryBarrier READ_BARRIER{
71 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | 114 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
72 VK_ACCESS_HOST_WRITE_BIT | 115 .pNext = nullptr,
73 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), 116 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
74 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 117 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
75 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 118 };
76 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 119 static constexpr VkMemoryBarrier WRITE_BARRIER{
77 .buffer = handle, 120 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
78 .offset = offset, 121 .pNext = nullptr,
79 .size = data_size, 122 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
80 }; 123 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
81 const VkBufferMemoryBarrier write_barrier{ 124 };
82 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 125 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
83 .pNext = nullptr, 126 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size());
84 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 127 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
85 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 128 scheduler.RequestOutsideRenderPassOperationContext();
86 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 129 scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
87 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
88 .buffer = handle,
89 .offset = offset,
90 .size = data_size,
91 };
92 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 130 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
93 0, read_barrier); 131 0, READ_BARRIER);
94 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); 132 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
95 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, 133 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
96 write_barrier); 134 0, WRITE_BARRIER);
97 }); 135 });
98} 136}
99 137
100void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
101 auto staging = staging_pool.Request(data_size, MemoryUsage::Download); 139 u32 base_vertex, u32 num_indices, VkBuffer buffer,
102 scheduler.RequestOutsideRenderPassOperationContext(); 140 u32 offset, [[maybe_unused]] u32 size) {
141 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
142 VkDeviceSize vk_offset = offset;
143 VkBuffer vk_buffer = buffer;
144 if (topology == PrimitiveTopology::Quads) {
145 vk_index_type = VK_INDEX_TYPE_UINT32;
146 std::tie(vk_buffer, vk_offset) =
147 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
148 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
149 vk_index_type = VK_INDEX_TYPE_UINT16;
150 std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
151 }
152 if (vk_buffer == VK_NULL_HANDLE) {
153 // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
154 ReserveNullIndexBuffer();
155 vk_buffer = *null_index_buffer;
156 }
157 scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
158 cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
159 });
160}
103 161
104 const VkBuffer handle = Handle(); 162void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
105 scheduler.Record( 163 ReserveQuadArrayLUT(first + count, true);
106 [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
107 const VkBufferMemoryBarrier barrier{
108 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
109 .pNext = nullptr,
110 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
111 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
112 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
113 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
114 .buffer = handle,
115 .offset = offset,
116 .size = data_size,
117 };
118
119 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
120 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
121 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
122 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
123 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size});
124 });
125 scheduler.Finish();
126 164
127 std::memcpy(data, staging.mapped_span.data(), data_size); 165 // The LUT has the indices 0, 1, 2, and 3 copied as an array
166 // To apply these 'first' offsets we can apply an offset based on the modulus.
167 const VkIndexType index_type = quad_array_lut_index_type;
168 const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4);
169 const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type);
170 scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) {
171 cmdbuf.BindIndexBuffer(buffer, offset, index_type);
172 });
128} 173}
129 174
130void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 175void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
131 std::size_t copy_size) { 176 u32 stride) {
132 scheduler.RequestOutsideRenderPassOperationContext(); 177 if (device.IsExtExtendedDynamicStateSupported()) {
178 scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) {
179 const VkDeviceSize vk_offset = offset;
180 const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE;
181 const VkDeviceSize vk_stride = stride;
182 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
183 });
184 } else {
185 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
186 cmdbuf.BindVertexBuffer(index, buffer, offset);
187 });
188 }
189}
133 190
134 const VkBuffer dst_buffer = Handle(); 191void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset,
135 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, 192 u32 size) {
136 copy_size](vk::CommandBuffer cmdbuf) { 193 if (!device.IsExtTransformFeedbackSupported()) {
137 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); 194 // Already logged in the rasterizer
138 195 return;
139 std::array<VkBufferMemoryBarrier, 2> barriers; 196 }
140 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 197 scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) {
141 barriers[0].pNext = nullptr; 198 const VkDeviceSize vk_offset = offset;
142 barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 199 const VkDeviceSize vk_size = size;
143 barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; 200 cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size);
144 barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
145 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
146 barriers[0].buffer = src_buffer;
147 barriers[0].offset = src_offset;
148 barriers[0].size = copy_size;
149 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
150 barriers[1].pNext = nullptr;
151 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
152 barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
153 barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
154 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
155 barriers[1].buffer = dst_buffer;
156 barriers[1].offset = dst_offset;
157 barriers[1].size = copy_size;
158 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
159 barriers, {});
160 }); 201 });
161} 202}
162 203
163VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 204void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
164 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 205 update_descriptor_queue.AddBuffer(buffer, offset, size);
165 const Device& device_, MemoryAllocator& memory_allocator_, 206}
166 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
167 StagingBufferPool& staging_pool_)
168 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
169 cpu_memory_, stream_buffer_},
170 device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
171 staging_pool{staging_pool_} {}
172 207
173VKBufferCache::~VKBufferCache() = default; 208void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
209 if (num_indices <= current_num_indices) {
210 return;
211 }
212 if (wait_for_idle) {
213 scheduler.Finish();
214 }
215 current_num_indices = num_indices;
216 quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
174 217
175std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 218 const u32 num_quads = num_indices / 4;
176 return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, 219 const u32 num_triangle_indices = num_quads * 6;
177 size); 220 const u32 num_first_offset_copies = 4;
221 const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
222 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
223 quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
224 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
225 .pNext = nullptr,
226 .flags = 0,
227 .size = size_bytes,
228 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
229 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
230 .queueFamilyIndexCount = 0,
231 .pQueueFamilyIndices = nullptr,
232 });
233 if (device.HasDebuggingToolAttached()) {
234 quad_array_lut.SetObjectNameEXT("Quad LUT");
235 }
236 quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
237
238 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
239 u8* staging_data = staging.mapped_span.data();
240 const size_t quad_size = bytes_per_index * 6;
241 for (u32 first = 0; first < num_first_offset_copies; ++first) {
242 for (u32 quad = 0; quad < num_quads; ++quad) {
243 switch (quad_array_lut_index_type) {
244 case VK_INDEX_TYPE_UINT8_EXT:
245 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
246 break;
247 case VK_INDEX_TYPE_UINT16:
248 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
249 break;
250 case VK_INDEX_TYPE_UINT32:
251 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
252 break;
253 default:
254 UNREACHABLE();
255 break;
256 }
257 staging_data += quad_size;
258 }
259 }
260 scheduler.RequestOutsideRenderPassOperationContext();
261 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
262 dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
263 const VkBufferCopy copy{
264 .srcOffset = src_offset,
265 .dstOffset = 0,
266 .size = size_bytes,
267 };
268 const VkBufferMemoryBarrier write_barrier{
269 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
270 .pNext = nullptr,
271 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
272 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
273 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
274 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
275 .buffer = dst_buffer,
276 .offset = 0,
277 .size = size_bytes,
278 };
279 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
280 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
281 0, write_barrier);
282 });
178} 283}
179 284
180VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { 285void BufferCacheRuntime::ReserveNullIndexBuffer() {
181 size = std::max(size, std::size_t(4)); 286 if (null_index_buffer) {
182 const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); 287 return;
288 }
289 null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
290 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
291 .pNext = nullptr,
292 .flags = 0,
293 .size = 4,
294 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
295 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
296 .queueFamilyIndexCount = 0,
297 .pQueueFamilyIndices = nullptr,
298 });
299 if (device.HasDebuggingToolAttached()) {
300 null_index_buffer.SetObjectNameEXT("Null index buffer");
301 }
302 null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal);
303
183 scheduler.RequestOutsideRenderPassOperationContext(); 304 scheduler.RequestOutsideRenderPassOperationContext();
184 scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { 305 scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) {
185 cmdbuf.FillBuffer(buffer, 0, size, 0); 306 cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
186 }); 307 });
187 return {empty.buffer, 0, 0};
188} 308}
189 309
190} // namespace Vulkan 310} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 41d577510..041e6515c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -4,69 +4,124 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8
9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 7#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_compute_pass.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
12#include "video_core/renderer_vulkan/vk_stream_buffer.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 11#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 12#include "video_core/vulkan_common/vulkan_wrapper.h"
15 13
16namespace Vulkan { 14namespace Vulkan {
17 15
18class Device; 16class Device;
17class VKDescriptorPool;
19class VKScheduler; 18class VKScheduler;
19class VKUpdateDescriptorQueue;
20 20
21class Buffer final : public VideoCommon::BufferBlock { 21class BufferCacheRuntime;
22public:
23 explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler,
24 StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
25 ~Buffer();
26
27 void Upload(std::size_t offset, std::size_t data_size, const u8* data);
28 22
29 void Download(std::size_t offset, std::size_t data_size, u8* data); 23class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
30 24public:
31 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, 25 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
32 std::size_t copy_size); 26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_);
33 28
34 VkBuffer Handle() const { 29 [[nodiscard]] VkBuffer Handle() const noexcept {
35 return *buffer; 30 return *buffer;
36 } 31 }
37 32
38 u64 Address() const { 33 operator VkBuffer() const noexcept {
39 return 0; 34 return *buffer;
40 } 35 }
41 36
42private: 37private:
43 const Device& device;
44 VKScheduler& scheduler;
45 StagingBufferPool& staging_pool;
46
47 vk::Buffer buffer; 38 vk::Buffer buffer;
48 MemoryCommit commit; 39 MemoryCommit commit;
49}; 40};
50 41
51class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 42class BufferCacheRuntime {
43 friend Buffer;
44
45 using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology;
46 using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat;
47
52public: 48public:
53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, 49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, 50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
55 const Device& device, MemoryAllocator& memory_allocator, 51 VKUpdateDescriptorQueue& update_descriptor_queue_,
56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer, 52 VKDescriptorPool& descriptor_pool);
57 StagingBufferPool& staging_pool); 53
58 ~VKBufferCache(); 54 void Finish();
55
56 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
57
58 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
59
60 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
61 std::span<const VideoCommon::BufferCopy> copies);
62
63 void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
64 u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
59 65
60 BufferInfo GetEmptyBuffer(std::size_t size) override; 66 void BindQuadArrayIndexBuffer(u32 first, u32 count);
61 67
62protected: 68 void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
63 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 69
70 void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
71
72 std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
73 [[maybe_unused]] u32 binding_index, u32 size) {
74 const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
75 BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
76 return ref.mapped_span;
77 }
78
79 void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
80 BindBuffer(buffer, offset, size);
81 }
82
83 void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
84 [[maybe_unused]] bool is_written) {
85 BindBuffer(buffer, offset, size);
86 }
64 87
65private: 88private:
89 void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
90
91 void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
92
93 void ReserveNullIndexBuffer();
94
66 const Device& device; 95 const Device& device;
67 MemoryAllocator& memory_allocator; 96 MemoryAllocator& memory_allocator;
68 VKScheduler& scheduler; 97 VKScheduler& scheduler;
69 StagingBufferPool& staging_pool; 98 StagingBufferPool& staging_pool;
99 VKUpdateDescriptorQueue& update_descriptor_queue;
100
101 vk::Buffer quad_array_lut;
102 MemoryCommit quad_array_lut_commit;
103 VkIndexType quad_array_lut_index_type{};
104 u32 current_num_indices = 0;
105
106 vk::Buffer null_index_buffer;
107 MemoryCommit null_index_buffer_commit;
108
109 Uint8Pass uint8_pass;
110 QuadIndexedPass quad_index_pass;
70}; 111};
71 112
113struct BufferCacheParams {
114 using Runtime = Vulkan::BufferCacheRuntime;
115 using Buffer = Vulkan::Buffer;
116
117 static constexpr bool IS_OPENGL = false;
118 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
119 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
120 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
121 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
122 static constexpr bool USE_MEMORY_MAPS = true;
123};
124
125using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
126
72} // namespace Vulkan 127} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 5eb6a54be..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,7 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -22,30 +22,7 @@
22#include "video_core/vulkan_common/vulkan_wrapper.h" 22#include "video_core/vulkan_common/vulkan_wrapper.h"
23 23
24namespace Vulkan { 24namespace Vulkan {
25
26namespace { 25namespace {
27
28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
29 return {
30 .binding = 0,
31 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
32 .descriptorCount = 1,
33 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
34 .pImmutableSamplers = nullptr,
35 };
36}
37
38VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
39 return {
40 .dstBinding = 0,
41 .dstArrayElement = 0,
42 .descriptorCount = 1,
43 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
44 .offset = 0,
45 .stride = sizeof(DescriptorUpdateEntry),
46 };
47}
48
49VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 26VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
50 return { 27 return {
51 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 28 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
162 return set; 139 return set;
163} 140}
164 141
165QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
166 VKDescriptorPool& descriptor_pool_,
167 StagingBufferPool& staging_buffer_pool_,
168 VKUpdateDescriptorQueue& update_descriptor_queue_)
169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
173 update_descriptor_queue{update_descriptor_queue_} {}
174
175QuadArrayPass::~QuadArrayPass() = default;
176
177std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
178 const u32 num_triangle_vertices = (num_vertices / 4) * 6;
179 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
180 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
181
182 update_descriptor_queue.Acquire();
183 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
184 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
185
186 scheduler.RequestOutsideRenderPassOperationContext();
187
188 ASSERT(num_vertices % 4 == 0);
189 const u32 num_quads = num_vertices / 4;
190 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer,
191 num_quads, first, set](vk::CommandBuffer cmdbuf) {
192 constexpr u32 dispatch_size = 1024;
193 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
195 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
196 cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
197
198 VkBufferMemoryBarrier barrier;
199 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
200 barrier.pNext = nullptr;
201 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
202 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
203 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
204 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
205 barrier.buffer = buffer;
206 barrier.offset = 0;
207 barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
208 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
209 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
210 });
211 return {staging_ref.buffer, 0};
212}
213
214Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, 142Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
215 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, 143 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
216 VKUpdateDescriptorQueue& update_descriptor_queue_) 144 VKUpdateDescriptorQueue& update_descriptor_queue_)
@@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
221 149
222Uint8Pass::~Uint8Pass() = default; 150Uint8Pass::~Uint8Pass() = default;
223 151
224std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, 152std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
225 u64 src_offset) { 153 u32 src_offset) {
226 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 154 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
227 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 155 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
228 156
229 update_descriptor_queue.Acquire(); 157 update_descriptor_queue.Acquire();
230 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 158 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
231 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 159 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
232 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 160 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
233 161
234 scheduler.RequestOutsideRenderPassOperationContext(); 162 scheduler.RequestOutsideRenderPassOperationContext();
235 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 163 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
236 num_vertices](vk::CommandBuffer cmdbuf) { 164 num_vertices](vk::CommandBuffer cmdbuf) {
237 constexpr u32 dispatch_size = 1024; 165 static constexpr u32 DISPATCH_SIZE = 1024;
166 static constexpr VkMemoryBarrier WRITE_BARRIER{
167 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
168 .pNext = nullptr,
169 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
170 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
171 };
238 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 172 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
239 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 173 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
240 cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); 174 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
241
242 VkBufferMemoryBarrier barrier;
243 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
244 barrier.pNext = nullptr;
245 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
246 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
247 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
248 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
249 barrier.buffer = buffer;
250 barrier.offset = 0;
251 barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
252 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 175 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
253 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 176 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
254 }); 177 });
255 return {staging_ref.buffer, 0}; 178 return {staging.buffer, staging.offset};
256} 179}
257 180
258QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 181QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
267 190
268QuadIndexedPass::~QuadIndexedPass() = default; 191QuadIndexedPass::~QuadIndexedPass() = default;
269 192
270std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( 193std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
271 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 194 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
272 VkBuffer src_buffer, u64 src_offset) { 195 VkBuffer src_buffer, u32 src_offset) {
273 const u32 index_shift = [index_format] { 196 const u32 index_shift = [index_format] {
274 switch (index_format) { 197 switch (index_format) {
275 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: 198 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
286 const u32 num_tri_vertices = (num_vertices / 4) * 6; 209 const u32 num_tri_vertices = (num_vertices / 4) * 6;
287 210
288 const std::size_t staging_size = num_tri_vertices * sizeof(u32); 211 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
289 const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 212 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
290 213
291 update_descriptor_queue.Acquire(); 214 update_descriptor_queue.Acquire();
292 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 215 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
293 update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); 216 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
294 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 217 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
295 218
296 scheduler.RequestOutsideRenderPassOperationContext(); 219 scheduler.RequestOutsideRenderPassOperationContext();
297 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, 220 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
298 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 221 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
299 static constexpr u32 dispatch_size = 1024; 222 static constexpr u32 DISPATCH_SIZE = 1024;
223 static constexpr VkMemoryBarrier WRITE_BARRIER{
224 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
225 .pNext = nullptr,
226 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
227 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
228 };
300 const std::array push_constants = {base_vertex, index_shift}; 229 const std::array push_constants = {base_vertex, index_shift};
301 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 230 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
302 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 231 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
303 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 232 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
304 &push_constants); 233 &push_constants);
305 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); 234 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
306
307 VkBufferMemoryBarrier barrier;
308 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
309 barrier.pNext = nullptr;
310 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
311 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
312 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
313 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
314 barrier.buffer = buffer;
315 barrier.offset = 0;
316 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
317 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 235 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
318 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 236 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
319 }); 237 });
320 return {staging_ref.buffer, 0}; 238 return {staging.buffer, staging.offset};
321} 239}
322 240
323} // namespace Vulkan 241} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index f5c6f5f17..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -41,22 +41,6 @@ private:
41 vk::ShaderModule module; 41 vk::ShaderModule module;
42}; 42};
43 43
44class QuadArrayPass final : public VKComputePass {
45public:
46 explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
47 VKDescriptorPool& descriptor_pool_,
48 StagingBufferPool& staging_buffer_pool_,
49 VKUpdateDescriptorQueue& update_descriptor_queue_);
50 ~QuadArrayPass();
51
52 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
53
54private:
55 VKScheduler& scheduler;
56 StagingBufferPool& staging_buffer_pool;
57 VKUpdateDescriptorQueue& update_descriptor_queue;
58};
59
60class Uint8Pass final : public VKComputePass { 44class Uint8Pass final : public VKComputePass {
61public: 45public:
62 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 46 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
@@ -64,7 +48,10 @@ public:
64 VKUpdateDescriptorQueue& update_descriptor_queue_); 48 VKUpdateDescriptorQueue& update_descriptor_queue_);
65 ~Uint8Pass(); 49 ~Uint8Pass();
66 50
67 std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); 51 /// Assemble uint8 indices into an uint16 index buffer
52 /// Returns a pair with the staging buffer, and the offset where the assembled data is
53 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
54 u32 src_offset);
68 55
69private: 56private:
70 VKScheduler& scheduler; 57 VKScheduler& scheduler;
@@ -80,9 +67,9 @@ public:
80 VKUpdateDescriptorQueue& update_descriptor_queue_); 67 VKUpdateDescriptorQueue& update_descriptor_queue_);
81 ~QuadIndexedPass(); 68 ~QuadIndexedPass();
82 69
83 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 70 std::pair<VkBuffer, VkDeviceSize> Assemble(
84 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, 71 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
85 u64 src_offset); 72 u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
86 73
87private: 74private:
88 VKScheduler& scheduler; 75 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 6cd00884d..3bec48d14 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -45,8 +45,8 @@ void InnerFence::Wait() {
45} 45}
46 46
47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 47VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
48 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 48 TextureCache& texture_cache_, BufferCache& buffer_cache_,
49 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 49 VKQueryCache& query_cache_, const Device& device_,
50 VKScheduler& scheduler_) 50 VKScheduler& scheduler_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
52 scheduler{scheduler_} {} 52 scheduler{scheduler_} {}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 9c5e5aa8f..2f8322d29 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -22,7 +22,6 @@ class RasterizerInterface;
22namespace Vulkan { 22namespace Vulkan {
23 23
24class Device; 24class Device;
25class VKBufferCache;
26class VKQueryCache; 25class VKQueryCache;
27class VKScheduler; 26class VKScheduler;
28 27
@@ -45,14 +44,14 @@ private:
45using Fence = std::shared_ptr<InnerFence>; 44using Fence = std::shared_ptr<InnerFence>;
46 45
47using GenericFenceManager = 46using GenericFenceManager =
48 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; 47 VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>;
49 48
50class VKFenceManager final : public GenericFenceManager { 49class VKFenceManager final : public GenericFenceManager {
51public: 50public:
52 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 51 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
53 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, 52 TextureCache& texture_cache, BufferCache& buffer_cache,
54 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 53 VKQueryCache& query_cache, const Device& device,
55 VKScheduler& scheduler_); 54 VKScheduler& scheduler);
56 55
57protected: 56protected:
58 Fence CreateFence(u32 value, bool is_stubbed) override; 57 Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index f336f1862..2c7ed654d 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -21,7 +21,12 @@ public:
21 21
22 /// Returns the current logical tick. 22 /// Returns the current logical tick.
23 [[nodiscard]] u64 CurrentTick() const noexcept { 23 [[nodiscard]] u64 CurrentTick() const noexcept {
24 return current_tick; 24 return current_tick.load(std::memory_order_relaxed);
25 }
26
27 /// Returns the last known GPU tick.
28 [[nodiscard]] u64 KnownGpuTick() const noexcept {
29 return gpu_tick.load(std::memory_order_relaxed);
25 } 30 }
26 31
27 /// Returns the timeline semaphore handle. 32 /// Returns the timeline semaphore handle.
@@ -31,7 +36,7 @@ public:
31 36
32 /// Returns true when a tick has been hit by the GPU. 37 /// Returns true when a tick has been hit by the GPU.
33 [[nodiscard]] bool IsFree(u64 tick) { 38 [[nodiscard]] bool IsFree(u64 tick) {
34 return gpu_tick >= tick; 39 return gpu_tick.load(std::memory_order_relaxed) >= tick;
35 } 40 }
36 41
37 /// Advance to the logical tick. 42 /// Advance to the logical tick.
@@ -41,7 +46,7 @@ public:
41 46
42 /// Refresh the known GPU tick 47 /// Refresh the known GPU tick
43 void Refresh() { 48 void Refresh() {
44 gpu_tick = semaphore.GetCounter(); 49 gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed);
45 } 50 }
46 51
47 /// Waits for a tick to be hit on the GPU 52 /// Waits for a tick to be hit on the GPU
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f0a111829..684d4e3a6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -8,8 +8,6 @@
8#include <mutex> 8#include <mutex>
9#include <vector> 9#include <vector>
10 10
11#include <boost/container/static_vector.hpp>
12
13#include "common/alignment.h" 11#include "common/alignment.h"
14#include "common/assert.h" 12#include "common/assert.h"
15#include "common/logging/log.h" 13#include "common/logging/log.h"
@@ -24,7 +22,6 @@
24#include "video_core/renderer_vulkan/maxwell_to_vk.h" 22#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h" 23#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25
50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); 48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); 49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
57MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
58MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
59MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
60 51
61namespace { 52namespace {
53struct DrawParams {
54 u32 base_instance;
55 u32 num_instances;
56 u32 base_vertex;
57 u32 num_vertices;
58 bool is_indexed;
59};
62 60
63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); 61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
64 62
@@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
67 const float width = src.scale_x * 2.0f; 65 const float width = src.scale_x * 2.0f;
68 const float height = src.scale_y * 2.0f; 66 const float height = src.scale_y * 2.0f;
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; 67 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
70
71 VkViewport viewport{ 68 VkViewport viewport{
72 .x = src.translate_x - src.scale_x, 69 .x = src.translate_x - src.scale_x,
73 .y = src.translate_y - src.scale_y, 70 .y = src.translate_y - src.scale_y,
@@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
76 .minDepth = src.translate_z - src.scale_z * reduce_z, 73 .minDepth = src.translate_z - src.scale_z * reduce_z,
77 .maxDepth = src.translate_z + src.scale_z, 74 .maxDepth = src.translate_z + src.scale_z,
78 }; 75 };
79
80 if (!device.IsExtDepthRangeUnrestrictedSupported()) { 76 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
81 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); 77 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
82 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); 78 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
83 } 79 }
84
85 return viewport; 80 return viewport;
86} 81}
87 82
@@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); 141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
147} 142}
148 143
149template <size_t N>
150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
151 std::array<VkDeviceSize, N> expanded;
152 std::copy(strides.begin(), strides.end(), expanded.begin());
153 return expanded;
154}
155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { 144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) { 145 if (entry.is_buffer) {
158 return ImageViewType::e2D; 146 return ImageViewType::e2D;
@@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca
221 } 209 }
222} 210}
223 211
224} // Anonymous namespace 212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
225 213 bool is_indexed) {
226class BufferBindings final { 214 DrawParams params{
227public: 215 .base_instance = regs.vb_base_instance,
228 void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { 216 .num_instances = is_instanced ? num_instances : 1,
229 vertex.buffers[vertex.num_buffers] = buffer; 217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
230 vertex.offsets[vertex.num_buffers] = offset; 218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
231 vertex.sizes[vertex.num_buffers] = size; 219 .is_indexed = is_indexed,
232 vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); 220 };
233 ++vertex.num_buffers; 221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
234 } 222 // 6 triangle vertices per quad, base vertex is part of the index
235 223 // See BindQuadArrayIndexBuffer for more details
236 void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { 224 params.num_vertices = (params.num_vertices / 4) * 6;
237 index.buffer = buffer; 225 params.base_vertex = 0;
238 index.offset = offset; 226 params.is_indexed = true;
239 index.type = type;
240 }
241
242 void Bind(const Device& device, VKScheduler& scheduler) const {
243 // Use this large switch case to avoid dispatching more memory in the record lambda than
244 // what we need. It looks horrible, but it's the best we can do on standard C++.
245 switch (vertex.num_buffers) {
246 case 0:
247 return BindStatic<0>(device, scheduler);
248 case 1:
249 return BindStatic<1>(device, scheduler);
250 case 2:
251 return BindStatic<2>(device, scheduler);
252 case 3:
253 return BindStatic<3>(device, scheduler);
254 case 4:
255 return BindStatic<4>(device, scheduler);
256 case 5:
257 return BindStatic<5>(device, scheduler);
258 case 6:
259 return BindStatic<6>(device, scheduler);
260 case 7:
261 return BindStatic<7>(device, scheduler);
262 case 8:
263 return BindStatic<8>(device, scheduler);
264 case 9:
265 return BindStatic<9>(device, scheduler);
266 case 10:
267 return BindStatic<10>(device, scheduler);
268 case 11:
269 return BindStatic<11>(device, scheduler);
270 case 12:
271 return BindStatic<12>(device, scheduler);
272 case 13:
273 return BindStatic<13>(device, scheduler);
274 case 14:
275 return BindStatic<14>(device, scheduler);
276 case 15:
277 return BindStatic<15>(device, scheduler);
278 case 16:
279 return BindStatic<16>(device, scheduler);
280 case 17:
281 return BindStatic<17>(device, scheduler);
282 case 18:
283 return BindStatic<18>(device, scheduler);
284 case 19:
285 return BindStatic<19>(device, scheduler);
286 case 20:
287 return BindStatic<20>(device, scheduler);
288 case 21:
289 return BindStatic<21>(device, scheduler);
290 case 22:
291 return BindStatic<22>(device, scheduler);
292 case 23:
293 return BindStatic<23>(device, scheduler);
294 case 24:
295 return BindStatic<24>(device, scheduler);
296 case 25:
297 return BindStatic<25>(device, scheduler);
298 case 26:
299 return BindStatic<26>(device, scheduler);
300 case 27:
301 return BindStatic<27>(device, scheduler);
302 case 28:
303 return BindStatic<28>(device, scheduler);
304 case 29:
305 return BindStatic<29>(device, scheduler);
306 case 30:
307 return BindStatic<30>(device, scheduler);
308 case 31:
309 return BindStatic<31>(device, scheduler);
310 case 32:
311 return BindStatic<32>(device, scheduler);
312 }
313 UNREACHABLE();
314 }
315
316private:
317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
318 struct {
319 size_t num_buffers = 0;
320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
323 std::array<u16, Maxwell::NumVertexArrays> strides;
324 } vertex;
325
326 struct {
327 VkBuffer buffer = nullptr;
328 VkDeviceSize offset;
329 VkIndexType type;
330 } index;
331
332 template <size_t N>
333 void BindStatic(const Device& device, VKScheduler& scheduler) const {
334 if (device.IsExtExtendedDynamicStateSupported()) {
335 if (index.buffer) {
336 BindStatic<N, true, true>(scheduler);
337 } else {
338 BindStatic<N, false, true>(scheduler);
339 }
340 } else {
341 if (index.buffer) {
342 BindStatic<N, true, false>(scheduler);
343 } else {
344 BindStatic<N, false, false>(scheduler);
345 }
346 }
347 }
348
349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
350 void BindStatic(VKScheduler& scheduler) const {
351 static_assert(N <= Maxwell::NumVertexArrays);
352 if constexpr (N == 0) {
353 return;
354 }
355
356 std::array<VkBuffer, N> buffers;
357 std::array<VkDeviceSize, N> offsets;
358 std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
359 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
360
361 if constexpr (has_extended_dynamic_state) {
362 // With extended dynamic states we can specify the length and stride of a vertex buffer
363 std::array<VkDeviceSize, N> sizes;
364 std::array<u16, N> strides;
365 std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin());
366 std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin());
367
368 if constexpr (is_indexed) {
369 scheduler.Record(
370 [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) {
371 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
372 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
373 offsets.data(), sizes.data(),
374 ExpandStrides(strides).data());
375 });
376 } else {
377 scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) {
378 cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(),
379 offsets.data(), sizes.data(),
380 ExpandStrides(strides).data());
381 });
382 }
383 return;
384 }
385
386 if constexpr (is_indexed) {
387 // Indexed draw
388 scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
389 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
390 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
391 });
392 } else {
393 // Array draw
394 scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
395 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
396 });
397 }
398 }
399};
400
401void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
402 if (is_indexed) {
403 cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
404 } else {
405 cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
406 } 227 }
228 return params;
407} 229}
230} // Anonymous namespace
408 231
409RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 232RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
410 Tegra::MemoryManager& gpu_memory_, 233 Tegra::MemoryManager& gpu_memory_,
@@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, 237 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, 238 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
416 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, 239 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), 240 state_tracker{state_tracker_}, scheduler{scheduler_},
418 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 241 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
419 update_descriptor_queue(device, scheduler), 242 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool), 243 blit_image(device, scheduler, state_tracker, descriptor_pool),
421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
424 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, 244 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 245 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
246 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
247 update_descriptor_queue, descriptor_pool),
248 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 249 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
427 descriptor_pool, update_descriptor_queue), 250 descriptor_pool, update_descriptor_queue),
428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler,
429 stream_buffer, staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, 251 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), 252 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
432 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 253 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
433 scheduler.SetQueryCache(query_cache); 254 scheduler.SetQueryCache(query_cache);
434 if (device.UseAsynchronousShaders()) { 255 if (device.UseAsynchronousShaders()) {
@@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
449 GraphicsPipelineCacheKey key; 270 GraphicsPipelineCacheKey key;
450 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); 271 key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
451 272
452 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); 273 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
453
454 BufferBindings buffer_bindings;
455 const DrawParameters draw_params =
456 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
457 274
458 auto lock = texture_cache.AcquireLock();
459 texture_cache.SynchronizeGraphicsDescriptors(); 275 texture_cache.SynchronizeGraphicsDescriptors();
460
461 texture_cache.UpdateRenderTargets(false); 276 texture_cache.UpdateRenderTargets(false);
462 277
463 const auto shaders = pipeline_cache.GetShaders(); 278 const auto shaders = pipeline_cache.GetShaders();
464 key.shaders = GetShaderAddresses(shaders); 279 key.shaders = GetShaderAddresses(shaders);
465 SetupShaderDescriptors(shaders); 280 SetupShaderDescriptors(shaders, is_indexed);
466
467 buffer_cache.Unmap();
468 281
469 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 282 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
470 key.renderpass = framebuffer->RenderPass(); 283 key.renderpass = framebuffer->RenderPass();
@@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
476 return; 289 return;
477 } 290 }
478 291
479 buffer_bindings.Bind(device, scheduler);
480
481 BeginTransformFeedback(); 292 BeginTransformFeedback();
482 293
483 scheduler.RequestRenderpass(framebuffer); 294 scheduler.RequestRenderpass(framebuffer);
484 scheduler.BindGraphicsPipeline(pipeline->GetHandle()); 295 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
485 UpdateDynamicStates(); 296 UpdateDynamicStates();
486 297
487 const auto pipeline_layout = pipeline->GetLayout(); 298 const auto& regs = maxwell3d.regs;
488 const auto descriptor_set = pipeline->CommitDescriptorSet(); 299 const u32 num_instances = maxwell3d.mme_draw.instance_count;
300 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
301 const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
302 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
489 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 303 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
490 if (descriptor_set) { 304 if (descriptor_set) {
491 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 305 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
492 DESCRIPTOR_SET, descriptor_set, {}); 306 DESCRIPTOR_SET, descriptor_set, nullptr);
307 }
308 if (draw_params.is_indexed) {
309 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
310 draw_params.base_vertex, draw_params.base_instance);
311 } else {
312 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance);
493 } 314 }
494 draw_params.Draw(cmdbuf);
495 }); 315 });
496 316
497 EndTransformFeedback(); 317 EndTransformFeedback();
@@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() {
515 return; 335 return;
516 } 336 }
517 337
518 auto lock = texture_cache.AcquireLock(); 338 std::scoped_lock lock{texture_cache.mutex};
519 texture_cache.UpdateRenderTargets(true); 339 texture_cache.UpdateRenderTargets(true);
520 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); 340 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
521 const VkExtent2D render_area = framebuffer->RenderArea(); 341 const VkExtent2D render_area = framebuffer->RenderArea();
@@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() {
559 if (use_stencil) { 379 if (use_stencil) {
560 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; 380 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
561 } 381 }
562
563 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, 382 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
564 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { 383 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
565 VkClearAttachment attachment; 384 VkClearAttachment attachment;
@@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
580 auto& pipeline = pipeline_cache.GetComputePipeline({ 399 auto& pipeline = pipeline_cache.GetComputePipeline({
581 .shader = code_addr, 400 .shader = code_addr,
582 .shared_memory_size = launch_desc.shared_alloc, 401 .shared_memory_size = launch_desc.shared_alloc,
583 .workgroup_size = 402 .workgroup_size{
584 { 403 launch_desc.block_dim_x,
585 launch_desc.block_dim_x, 404 launch_desc.block_dim_y,
586 launch_desc.block_dim_y, 405 launch_desc.block_dim_z,
587 launch_desc.block_dim_z, 406 },
588 },
589 }); 407 });
590 408
591 // Compute dispatches can't be executed inside a renderpass 409 // Compute dispatches can't be executed inside a renderpass
@@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
594 image_view_indices.clear(); 412 image_view_indices.clear();
595 sampler_handles.clear(); 413 sampler_handles.clear();
596 414
597 auto lock = texture_cache.AcquireLock(); 415 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
598 texture_cache.SynchronizeComputeDescriptors();
599 416
600 const auto& entries = pipeline.GetEntries(); 417 const auto& entries = pipeline.GetEntries();
418 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
419 buffer_cache.UnbindComputeStorageBuffers();
420 u32 ssbo_index = 0;
421 for (const auto& buffer : entries.global_buffers) {
422 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
423 buffer.is_written);
424 ++ssbo_index;
425 }
426 buffer_cache.UpdateComputeBuffers();
427
428 texture_cache.SynchronizeComputeDescriptors();
429
601 SetupComputeUniformTexels(entries); 430 SetupComputeUniformTexels(entries);
602 SetupComputeTextures(entries); 431 SetupComputeTextures(entries);
603 SetupComputeStorageTexels(entries); 432 SetupComputeStorageTexels(entries);
@@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
606 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 435 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
607 texture_cache.FillComputeImageViews(indices_span, image_view_ids); 436 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
608 437
609 buffer_cache.Map(CalculateComputeStreamBufferSize());
610
611 update_descriptor_queue.Acquire(); 438 update_descriptor_queue.Acquire();
612 439
613 SetupComputeConstBuffers(entries); 440 buffer_cache.BindHostComputeBuffers();
614 SetupComputeGlobalBuffers(entries);
615 441
616 ImageViewId* image_view_id_ptr = image_view_ids.data(); 442 ImageViewId* image_view_id_ptr = image_view_ids.data();
617 VkSampler* sampler_ptr = sampler_handles.data(); 443 VkSampler* sampler_ptr = sampler_handles.data();
618 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, 444 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
619 sampler_ptr); 445 sampler_ptr);
620 446
621 buffer_cache.Unmap();
622
623 const VkPipeline pipeline_handle = pipeline.GetHandle(); 447 const VkPipeline pipeline_handle = pipeline.GetHandle();
624 const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); 448 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
625 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); 449 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
@@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
644 query_cache.Query(gpu_addr, type, timestamp); 468 query_cache.Query(gpu_addr, type, timestamp);
645} 469}
646 470
471void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
472 u32 size) {
473 buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
474}
475
647void RasterizerVulkan::FlushAll() {} 476void RasterizerVulkan::FlushAll() {}
648 477
649void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { 478void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
@@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
651 return; 480 return;
652 } 481 }
653 { 482 {
654 auto lock = texture_cache.AcquireLock(); 483 std::scoped_lock lock{texture_cache.mutex};
655 texture_cache.DownloadMemory(addr, size); 484 texture_cache.DownloadMemory(addr, size);
656 } 485 }
657 buffer_cache.FlushRegion(addr, size); 486 {
487 std::scoped_lock lock{buffer_cache.mutex};
488 buffer_cache.DownloadMemory(addr, size);
489 }
658 query_cache.FlushRegion(addr, size); 490 query_cache.FlushRegion(addr, size);
659} 491}
660 492
661bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { 493bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
494 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
662 if (!Settings::IsGPULevelHigh()) { 495 if (!Settings::IsGPULevelHigh()) {
663 return buffer_cache.MustFlushRegion(addr, size); 496 return buffer_cache.IsRegionGpuModified(addr, size);
664 } 497 }
665 return texture_cache.IsRegionGpuModified(addr, size) || 498 return texture_cache.IsRegionGpuModified(addr, size) ||
666 buffer_cache.MustFlushRegion(addr, size); 499 buffer_cache.IsRegionGpuModified(addr, size);
667} 500}
668 501
669void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 502void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
@@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
671 return; 504 return;
672 } 505 }
673 { 506 {
674 auto lock = texture_cache.AcquireLock(); 507 std::scoped_lock lock{texture_cache.mutex};
675 texture_cache.WriteMemory(addr, size); 508 texture_cache.WriteMemory(addr, size);
676 } 509 }
510 {
511 std::scoped_lock lock{buffer_cache.mutex};
512 buffer_cache.WriteMemory(addr, size);
513 }
677 pipeline_cache.InvalidateRegion(addr, size); 514 pipeline_cache.InvalidateRegion(addr, size);
678 buffer_cache.InvalidateRegion(addr, size);
679 query_cache.InvalidateRegion(addr, size); 515 query_cache.InvalidateRegion(addr, size);
680} 516}
681 517
@@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
683 if (addr == 0 || size == 0) { 519 if (addr == 0 || size == 0) {
684 return; 520 return;
685 } 521 }
522 pipeline_cache.OnCPUWrite(addr, size);
686 { 523 {
687 auto lock = texture_cache.AcquireLock(); 524 std::scoped_lock lock{texture_cache.mutex};
688 texture_cache.WriteMemory(addr, size); 525 texture_cache.WriteMemory(addr, size);
689 } 526 }
690 pipeline_cache.OnCPUWrite(addr, size); 527 {
691 buffer_cache.OnCPUWrite(addr, size); 528 std::scoped_lock lock{buffer_cache.mutex};
529 buffer_cache.CachedWriteMemory(addr, size);
530 }
692} 531}
693 532
694void RasterizerVulkan::SyncGuestHost() { 533void RasterizerVulkan::SyncGuestHost() {
695 buffer_cache.SyncGuestHost();
696 pipeline_cache.SyncGuestHost(); 534 pipeline_cache.SyncGuestHost();
535 {
536 std::scoped_lock lock{buffer_cache.mutex};
537 buffer_cache.FlushCachedWrites();
538 }
697} 539}
698 540
699void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 541void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
700 { 542 {
701 auto lock = texture_cache.AcquireLock(); 543 std::scoped_lock lock{texture_cache.mutex};
702 texture_cache.UnmapMemory(addr, size); 544 texture_cache.UnmapMemory(addr, size);
703 } 545 }
704 buffer_cache.OnCPUWrite(addr, size); 546 {
547 std::scoped_lock lock{buffer_cache.mutex};
548 buffer_cache.WriteMemory(addr, size);
549 }
705 pipeline_cache.OnCPUWrite(addr, size); 550 pipeline_cache.OnCPUWrite(addr, size);
706} 551}
707 552
@@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() {
774 draw_counter = 0; 619 draw_counter = 0;
775 update_descriptor_queue.TickFrame(); 620 update_descriptor_queue.TickFrame();
776 fence_manager.TickFrame(); 621 fence_manager.TickFrame();
777 buffer_cache.TickFrame();
778 staging_pool.TickFrame(); 622 staging_pool.TickFrame();
779 { 623 {
780 auto lock = texture_cache.AcquireLock(); 624 std::scoped_lock lock{texture_cache.mutex};
781 texture_cache.TickFrame(); 625 texture_cache.TickFrame();
782 } 626 }
627 {
628 std::scoped_lock lock{buffer_cache.mutex};
629 buffer_cache.TickFrame();
630 }
783} 631}
784 632
785bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 633bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
786 const Tegra::Engines::Fermi2D::Surface& dst, 634 const Tegra::Engines::Fermi2D::Surface& dst,
787 const Tegra::Engines::Fermi2D::Config& copy_config) { 635 const Tegra::Engines::Fermi2D::Config& copy_config) {
788 auto lock = texture_cache.AcquireLock(); 636 std::scoped_lock lock{texture_cache.mutex};
789 texture_cache.BlitImage(dst, src, copy_config); 637 texture_cache.BlitImage(dst, src, copy_config);
790 return true; 638 return true;
791} 639}
@@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
795 if (!framebuffer_addr) { 643 if (!framebuffer_addr) {
796 return false; 644 return false;
797 } 645 }
798 646 std::scoped_lock lock{texture_cache.mutex};
799 auto lock = texture_cache.AcquireLock();
800 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); 647 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
801 if (!image_view) { 648 if (!image_view) {
802 return false; 649 return false;
803 } 650 }
804
805 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 651 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
806 screen_info.width = image_view->size.width; 652 screen_info.width = image_view->size.width;
807 screen_info.height = image_view->size.height; 653 screen_info.height = image_view->size.height;
@@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() {
830 draw_counter = 0; 676 draw_counter = 0;
831} 677}
832 678
833RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
834 BufferBindings& buffer_bindings,
835 bool is_indexed,
836 bool is_instanced) {
837 MICROPROFILE_SCOPE(Vulkan_Geometry);
838
839 const auto& regs = maxwell3d.regs;
840
841 SetupVertexArrays(buffer_bindings);
842
843 const u32 base_instance = regs.vb_base_instance;
844 const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
845 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
846 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
847
848 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
849 SetupIndexBuffer(buffer_bindings, params, is_indexed);
850
851 return params;
852}
853
854void RasterizerVulkan::SetupShaderDescriptors( 679void RasterizerVulkan::SetupShaderDescriptors(
855 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 680 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
856 image_view_indices.clear(); 681 image_view_indices.clear();
857 sampler_handles.clear(); 682 sampler_handles.clear();
858 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 683 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
@@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors(
860 if (!shader) { 685 if (!shader) {
861 continue; 686 continue;
862 } 687 }
863 const auto& entries = shader->GetEntries(); 688 const ShaderEntries& entries = shader->GetEntries();
864 SetupGraphicsUniformTexels(entries, stage); 689 SetupGraphicsUniformTexels(entries, stage);
865 SetupGraphicsTextures(entries, stage); 690 SetupGraphicsTextures(entries, stage);
866 SetupGraphicsStorageTexels(entries, stage); 691 SetupGraphicsStorageTexels(entries, stage);
867 SetupGraphicsImages(entries, stage); 692 SetupGraphicsImages(entries, stage);
693
694 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
695 buffer_cache.UnbindGraphicsStorageBuffers(stage);
696 u32 ssbo_index = 0;
697 for (const auto& buffer : entries.global_buffers) {
698 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
699 buffer.cbuf_offset, buffer.is_written);
700 ++ssbo_index;
701 }
868 } 702 }
869 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 703 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
704 buffer_cache.UpdateGraphicsBuffers(is_indexed);
870 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); 705 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
871 706
707 buffer_cache.BindHostGeometryBuffers(is_indexed);
708
872 update_descriptor_queue.Acquire(); 709 update_descriptor_queue.Acquire();
873 710
874 ImageViewId* image_view_id_ptr = image_view_ids.data(); 711 ImageViewId* image_view_id_ptr = image_view_ids.data();
@@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
879 if (!shader) { 716 if (!shader) {
880 continue; 717 continue;
881 } 718 }
882 const auto& entries = shader->GetEntries(); 719 buffer_cache.BindHostStageBuffers(stage);
883 SetupGraphicsConstBuffers(entries, stage); 720 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
884 SetupGraphicsGlobalBuffers(entries, stage); 721 image_view_id_ptr, sampler_ptr);
885 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
886 sampler_ptr);
887 } 722 }
888} 723}
889 724
@@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() {
916 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); 751 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
917 return; 752 return;
918 } 753 }
919
920 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 754 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
921 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 755 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
922 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 756 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
923 757 scheduler.Record(
924 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); 758 [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
925 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
926 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
927
928 const auto& binding = regs.tfb_bindings[0];
929 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
930 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
931
932 const GPUVAddr gpu_addr = binding.Address();
933 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
934 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
935
936 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
937 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
938 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
939 });
940} 759}
941 760
942void RasterizerVulkan::EndTransformFeedback() { 761void RasterizerVulkan::EndTransformFeedback() {
@@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() {
947 if (!device.IsExtTransformFeedbackSupported()) { 766 if (!device.IsExtTransformFeedbackSupported()) {
948 return; 767 return;
949 } 768 }
950
951 scheduler.Record( 769 scheduler.Record(
952 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 770 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
953} 771}
954 772
955void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
956 const auto& regs = maxwell3d.regs;
957
958 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
959 const auto& vertex_array = regs.vertex_array[index];
960 if (!vertex_array.IsEnabled()) {
961 continue;
962 }
963 const GPUVAddr start{vertex_array.StartAddress()};
964 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
965
966 ASSERT(end >= start);
967 const size_t size = end - start;
968 if (size == 0) {
969 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
970 continue;
971 }
972 const auto info = buffer_cache.UploadMemory(start, size);
973 buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);
974 }
975}
976
977void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
978 bool is_indexed) {
979 if (params.num_vertices == 0) {
980 return;
981 }
982 const auto& regs = maxwell3d.regs;
983 switch (regs.draw.topology) {
984 case Maxwell::PrimitiveTopology::Quads: {
985 if (!params.is_indexed) {
986 const auto [buffer, offset] =
987 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
988 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
989 params.base_vertex = 0;
990 params.num_vertices = params.num_vertices * 6 / 4;
991 params.is_indexed = true;
992 break;
993 }
994 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
995 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
996 VkBuffer buffer = info.handle;
997 u64 offset = info.offset;
998 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
999 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
1000
1001 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
1002 params.num_vertices = (params.num_vertices / 4) * 6;
1003 params.base_vertex = 0;
1004 break;
1005 }
1006 default: {
1007 if (!is_indexed) {
1008 break;
1009 }
1010 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
1011 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1012 VkBuffer buffer = info.handle;
1013 u64 offset = info.offset;
1014
1015 auto format = regs.index_array.format;
1016 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
1017 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
1018 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
1019 format = Maxwell::IndexFormat::UnsignedShort;
1020 }
1021
1022 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
1023 break;
1024 }
1025 }
1026}
1027
1028void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1029 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1030 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1031 for (const auto& entry : entries.const_buffers) {
1032 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
1033 }
1034}
1035
1036void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1037 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1038 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1039
1040 for (const auto& entry : entries.global_buffers) {
1041 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
1042 SetupGlobalBuffer(entry, addr);
1043 }
1044}
1045
1046void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { 773void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1047 MICROPROFILE_SCOPE(Vulkan_Textures);
1048 const auto& regs = maxwell3d.regs; 774 const auto& regs = maxwell3d.regs;
1049 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 775 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1050 for (const auto& entry : entries.uniform_texels) { 776 for (const auto& entry : entries.uniform_texels) {
@@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries,
1054} 780}
1055 781
1056void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { 782void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1057 MICROPROFILE_SCOPE(Vulkan_Textures);
1058 const auto& regs = maxwell3d.regs; 783 const auto& regs = maxwell3d.regs;
1059 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 784 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1060 for (const auto& entry : entries.samplers) { 785 for (const auto& entry : entries.samplers) {
@@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_
1070} 795}
1071 796
1072void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { 797void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1073 MICROPROFILE_SCOPE(Vulkan_Textures);
1074 const auto& regs = maxwell3d.regs; 798 const auto& regs = maxwell3d.regs;
1075 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 799 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1076 for (const auto& entry : entries.storage_texels) { 800 for (const auto& entry : entries.storage_texels) {
@@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries,
1080} 804}
1081 805
1082void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { 806void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1083 MICROPROFILE_SCOPE(Vulkan_Images);
1084 const auto& regs = maxwell3d.regs; 807 const auto& regs = maxwell3d.regs;
1085 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; 808 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1086 for (const auto& entry : entries.images) { 809 for (const auto& entry : entries.images) {
@@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t
1089 } 812 }
1090} 813}
1091 814
1092void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1093 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1094 const auto& launch_desc = kepler_compute.launch_description;
1095 for (const auto& entry : entries.const_buffers) {
1096 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1097 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1098 const Tegra::Engines::ConstBufferInfo info{
1099 .address = config.Address(),
1100 .size = config.size,
1101 .enabled = mask[entry.GetIndex()],
1102 };
1103 SetupConstBuffer(entry, info);
1104 }
1105}
1106
1107void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1108 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1109 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1110 for (const auto& entry : entries.global_buffers) {
1111 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
1112 SetupGlobalBuffer(entry, addr);
1113 }
1114}
1115
1116void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 815void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1117 MICROPROFILE_SCOPE(Vulkan_Textures);
1118 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 816 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1119 for (const auto& entry : entries.uniform_texels) { 817 for (const auto& entry : entries.uniform_texels) {
1120 const TextureHandle handle = 818 const TextureHandle handle =
@@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1124} 822}
1125 823
1126void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 824void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1127 MICROPROFILE_SCOPE(Vulkan_Textures);
1128 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 825 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1129 for (const auto& entry : entries.samplers) { 826 for (const auto& entry : entries.samplers) {
1130 for (size_t index = 0; index < entry.size; ++index) { 827 for (size_t index = 0; index < entry.size; ++index) {
@@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1139} 836}
1140 837
1141void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 838void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1142 MICROPROFILE_SCOPE(Vulkan_Textures);
1143 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 839 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1144 for (const auto& entry : entries.storage_texels) { 840 for (const auto& entry : entries.storage_texels) {
1145 const TextureHandle handle = 841 const TextureHandle handle =
@@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1149} 845}
1150 846
1151void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 847void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1152 MICROPROFILE_SCOPE(Vulkan_Images);
1153 const bool via_header_index = kepler_compute.launch_description.linked_tsc; 848 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1154 for (const auto& entry : entries.images) { 849 for (const auto& entry : entries.images) {
1155 const TextureHandle handle = 850 const TextureHandle handle =
@@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1158 } 853 }
1159} 854}
1160 855
1161void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1162 const Tegra::Engines::ConstBufferInfo& buffer) {
1163 if (!buffer.enabled) {
1164 // Set values to zero to unbind buffers
1165 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1166 return;
1167 }
1168 // Align the size to avoid bad std140 interactions
1169 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1170 ASSERT(size <= MaxConstbufferSize);
1171
1172 const u64 alignment = device.GetUniformBufferAlignment();
1173 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1174 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1175}
1176
1177void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
1178 const u64 actual_addr = gpu_memory.Read<u64>(address);
1179 const u32 size = gpu_memory.Read<u32>(address + 8);
1180
1181 if (size == 0) {
1182 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
1183 // because Vulkan doesn't like empty buffers.
1184 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1185 // default buffer.
1186 static constexpr size_t dummy_size = 4;
1187 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1188 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1189 return;
1190 }
1191
1192 const auto info = buffer_cache.UploadMemory(
1193 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1194 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1195}
1196
1197void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 856void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1198 if (!state_tracker.TouchViewports()) { 857 if (!state_tracker.TouchViewports()) {
1199 return; 858 return;
@@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
1206 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), 865 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
1207 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), 866 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
1208 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), 867 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
1209 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; 868 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15),
869 };
1210 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); 870 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
1211} 871}
1212 872
@@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
1214 if (!state_tracker.TouchScissors()) { 874 if (!state_tracker.TouchScissors()) {
1215 return; 875 return;
1216 } 876 }
1217 const std::array scissors = { 877 const std::array scissors{
1218 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), 878 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
1219 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), 879 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
1220 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), 880 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
1221 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), 881 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
1222 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), 882 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
1223 GetScissorState(regs, 15)}; 883 GetScissorState(regs, 15),
884 };
1224 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); 885 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
1225} 886}
1226 887
@@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1385 }); 1046 });
1386} 1047}
1387 1048
1388size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1389 size_t size = CalculateVertexArraysSize();
1390 if (is_indexed) {
1391 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1392 }
1393 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1394 return size;
1395}
1396
1397size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1398 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1399 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1400}
1401
1402size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1403 const auto& regs = maxwell3d.regs;
1404
1405 size_t size = 0;
1406 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1407 // This implementation assumes that all attributes are used in the shader.
1408 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1409 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1410 DEBUG_ASSERT(end >= start);
1411
1412 size += (end - start) * regs.vertex_array[index].enable;
1413 }
1414 return size;
1415}
1416
1417size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1418 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1419 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1420}
1421
1422size_t RasterizerVulkan::CalculateConstBufferSize(
1423 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1424 if (entry.IsIndirect()) {
1425 // Buffer is accessed indirectly, so upload the entire thing
1426 return buffer.size;
1427 } else {
1428 // Buffer is accessed directly, upload just what we use
1429 return entry.GetSize();
1430 }
1431}
1432
1433VkBuffer RasterizerVulkan::DefaultBuffer() {
1434 if (default_buffer) {
1435 return *default_buffer;
1436 }
1437 default_buffer = device.GetLogical().CreateBuffer({
1438 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1439 .pNext = nullptr,
1440 .flags = 0,
1441 .size = DEFAULT_BUFFER_SIZE,
1442 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1443 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
1444 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1445 .queueFamilyIndexCount = 0,
1446 .pQueueFamilyIndices = nullptr,
1447 });
1448 default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
1449
1450 scheduler.RequestOutsideRenderPassOperationContext();
1451 scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
1452 cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
1453 });
1454 return *default_buffer;
1455}
1456
1457} // namespace Vulkan 1049} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 8e261b9bd..7fc6741da 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -18,14 +18,12 @@
18#include "video_core/renderer_vulkan/blit_image.h" 18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 21#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 22#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 23#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
25#include "video_core/renderer_vulkan/vk_query_cache.h" 24#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 25#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 26#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
28#include "video_core/renderer_vulkan/vk_stream_buffer.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 27#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 28#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h" 29#include "video_core/shader/async_shaders.h"
@@ -49,7 +47,6 @@ namespace Vulkan {
49struct VKScreenInfo; 47struct VKScreenInfo;
50 48
51class StateTracker; 49class StateTracker;
52class BufferBindings;
53 50
54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 51class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
55public: 52public:
@@ -65,6 +62,7 @@ public:
65 void DispatchCompute(GPUVAddr code_addr) override; 62 void DispatchCompute(GPUVAddr code_addr) override;
66 void ResetCounter(VideoCore::QueryType type) override; 63 void ResetCounter(VideoCore::QueryType type) override;
67 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 64 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
65 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
68 void FlushAll() override; 66 void FlushAll() override;
69 void FlushRegion(VAddr addr, u64 size) override; 67 void FlushRegion(VAddr addr, u64 size) override;
70 bool MustFlushRegion(VAddr addr, u64 size) override; 68 bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -107,24 +105,11 @@ private:
107 105
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); 106 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109 107
110 struct DrawParameters {
111 void Draw(vk::CommandBuffer cmdbuf) const;
112
113 u32 base_instance = 0;
114 u32 num_instances = 0;
115 u32 base_vertex = 0;
116 u32 num_vertices = 0;
117 bool is_indexed = 0;
118 };
119
120 void FlushWork(); 108 void FlushWork();
121 109
122 /// Setups geometry buffers and state.
123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
124 bool is_indexed, bool is_instanced);
125
126 /// Setup descriptors in the graphics pipeline. 110 /// Setup descriptors in the graphics pipeline.
127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 111 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
112 bool is_indexed);
128 113
129 void UpdateDynamicStates(); 114 void UpdateDynamicStates();
130 115
@@ -132,16 +117,6 @@ private:
132 117
133 void EndTransformFeedback(); 118 void EndTransformFeedback();
134 119
135 void SetupVertexArrays(BufferBindings& buffer_bindings);
136
137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
138
139 /// Setup constant buffers in the graphics pipeline.
140 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
141
142 /// Setup global buffers in the graphics pipeline.
143 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
144
145 /// Setup uniform texels in the graphics pipeline. 120 /// Setup uniform texels in the graphics pipeline.
146 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); 121 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
147 122
@@ -154,12 +129,6 @@ private:
154 /// Setup images in the graphics pipeline. 129 /// Setup images in the graphics pipeline.
155 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); 130 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
156 131
157 /// Setup constant buffers in the compute pipeline.
158 void SetupComputeConstBuffers(const ShaderEntries& entries);
159
160 /// Setup global buffers in the compute pipeline.
161 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
162
163 /// Setup texel buffers in the compute pipeline. 132 /// Setup texel buffers in the compute pipeline.
164 void SetupComputeUniformTexels(const ShaderEntries& entries); 133 void SetupComputeUniformTexels(const ShaderEntries& entries);
165 134
@@ -172,11 +141,6 @@ private:
172 /// Setup images in the compute pipeline. 141 /// Setup images in the compute pipeline.
173 void SetupComputeImages(const ShaderEntries& entries); 142 void SetupComputeImages(const ShaderEntries& entries);
174 143
175 void SetupConstBuffer(const ConstBufferEntry& entry,
176 const Tegra::Engines::ConstBufferInfo& buffer);
177
178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
179
180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 144 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 145 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 146 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -193,19 +157,6 @@ private:
193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 157 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 158 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
195 159
196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
197
198 size_t CalculateComputeStreamBufferSize() const;
199
200 size_t CalculateVertexArraysSize() const;
201
202 size_t CalculateIndexBufferSize() const;
203
204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
206
207 VkBuffer DefaultBuffer();
208
209 Tegra::GPU& gpu; 160 Tegra::GPU& gpu;
210 Tegra::MemoryManager& gpu_memory; 161 Tegra::MemoryManager& gpu_memory;
211 Tegra::Engines::Maxwell3D& maxwell3d; 162 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -217,24 +168,19 @@ private:
217 StateTracker& state_tracker; 168 StateTracker& state_tracker;
218 VKScheduler& scheduler; 169 VKScheduler& scheduler;
219 170
220 VKStreamBuffer stream_buffer;
221 StagingBufferPool staging_pool; 171 StagingBufferPool staging_pool;
222 VKDescriptorPool descriptor_pool; 172 VKDescriptorPool descriptor_pool;
223 VKUpdateDescriptorQueue update_descriptor_queue; 173 VKUpdateDescriptorQueue update_descriptor_queue;
224 BlitImageHelper blit_image; 174 BlitImageHelper blit_image;
225 QuadArrayPass quad_array_pass;
226 QuadIndexedPass quad_indexed_pass;
227 Uint8Pass uint8_pass;
228 175
229 TextureCacheRuntime texture_cache_runtime; 176 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache; 177 TextureCache texture_cache;
178 BufferCacheRuntime buffer_cache_runtime;
179 BufferCache buffer_cache;
231 VKPipelineCache pipeline_cache; 180 VKPipelineCache pipeline_cache;
232 VKBufferCache buffer_cache;
233 VKQueryCache query_cache; 181 VKQueryCache query_cache;
234 VKFenceManager fence_manager; 182 VKFenceManager fence_manager;
235 183
236 vk::Buffer default_buffer;
237 MemoryCommit default_buffer_commit;
238 vk::Event wfi_event; 184 vk::Event wfi_event;
239 VideoCommon::Shader::AsyncShaders async_shaders; 185 VideoCommon::Shader::AsyncShaders async_shaders;
240 186
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index ee274ac59..a8bf7bda8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -17,21 +17,21 @@ ResourcePool::~ResourcePool() = default;
17size_t ResourcePool::CommitResource() { 17size_t ResourcePool::CommitResource() {
18 // Refresh semaphore to query updated results 18 // Refresh semaphore to query updated results
19 master_semaphore.Refresh(); 19 master_semaphore.Refresh();
20 20 const u64 gpu_tick = master_semaphore.KnownGpuTick();
21 const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> { 21 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
22 for (size_t iterator = begin; iterator < end; ++iterator) { 22 for (size_t iterator = begin; iterator < end; ++iterator) {
23 if (master_semaphore.IsFree(ticks[iterator])) { 23 if (gpu_tick >= ticks[iterator]) {
24 ticks[iterator] = master_semaphore.CurrentTick(); 24 ticks[iterator] = master_semaphore.CurrentTick();
25 return iterator; 25 return iterator;
26 } 26 }
27 } 27 }
28 return {}; 28 return std::nullopt;
29 }; 29 };
30 // Try to find a free resource from the hinted position to the end. 30 // Try to find a free resource from the hinted position to the end.
31 auto found = search(free_iterator, ticks.size()); 31 std::optional<size_t> found = search(hint_iterator, ticks.size());
32 if (!found) { 32 if (!found) {
33 // Search from beginning to the hinted position. 33 // Search from beginning to the hinted position.
34 found = search(0, free_iterator); 34 found = search(0, hint_iterator);
35 if (!found) { 35 if (!found) {
36 // Both searches failed, the pool is full; handle it. 36 // Both searches failed, the pool is full; handle it.
37 const size_t free_resource = ManageOverflow(); 37 const size_t free_resource = ManageOverflow();
@@ -41,7 +41,7 @@ size_t ResourcePool::CommitResource() {
41 } 41 }
42 } 42 }
43 // Free iterator is hinted to the resource after the one that's been commited. 43 // Free iterator is hinted to the resource after the one that's been commited.
44 free_iterator = (*found + 1) % ticks.size(); 44 hint_iterator = (*found + 1) % ticks.size();
45 return *found; 45 return *found;
46} 46}
47 47
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index a018c7ec2..9d0bb3b4d 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -36,7 +36,7 @@ private:
36 36
37 MasterSemaphore& master_semaphore; 37 MasterSemaphore& master_semaphore;
38 size_t grow_step = 0; ///< Number of new resources created after an overflow 38 size_t grow_step = 0; ///< Number of new resources created after an overflow
39 size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found 39 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
40 std::vector<u64> ticks; ///< Ticks for each resource 40 std::vector<u64> ticks; ///< Ticks for each resource
41}; 41};
42 42
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 66004f9c0..f35c120b0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() {
52 worker_thread.join(); 52 worker_thread.join();
53} 53}
54 54
55u64 VKScheduler::CurrentTick() const noexcept {
56 return master_semaphore->CurrentTick();
57}
58
59bool VKScheduler::IsFree(u64 tick) const noexcept {
60 return master_semaphore->IsFree(tick);
61}
62
63void VKScheduler::Wait(u64 tick) {
64 master_semaphore->Wait(tick);
65}
66
67void VKScheduler::Flush(VkSemaphore semaphore) { 55void VKScheduler::Flush(VkSemaphore semaphore) {
68 SubmitExecution(semaphore); 56 SubmitExecution(semaphore);
69 AllocateNewContext(); 57 AllocateNewContext();
@@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() {
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 257 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | 258 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 259 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, 260 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images)); 261 vk::Span(barriers.data(), num_images));
274 }); 262 });
275 state.renderpass = nullptr; 263 state.renderpass = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 15f2987eb..3ce48e9d2 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/threadsafe_queue.h" 16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
@@ -21,7 +22,6 @@ namespace Vulkan {
21class CommandPool; 22class CommandPool;
22class Device; 23class Device;
23class Framebuffer; 24class Framebuffer;
24class MasterSemaphore;
25class StateTracker; 25class StateTracker;
26class VKQueryCache; 26class VKQueryCache;
27 27
@@ -32,15 +32,6 @@ public:
32 explicit VKScheduler(const Device& device, StateTracker& state_tracker); 32 explicit VKScheduler(const Device& device, StateTracker& state_tracker);
33 ~VKScheduler(); 33 ~VKScheduler();
34 34
35 /// Returns the current command buffer tick.
36 [[nodiscard]] u64 CurrentTick() const noexcept;
37
38 /// Returns true when a tick has been triggered by the GPU.
39 [[nodiscard]] bool IsFree(u64 tick) const noexcept;
40
41 /// Waits for the given tick to trigger on the GPU.
42 void Wait(u64 tick);
43
44 /// Sends the current execution context to the GPU. 35 /// Sends the current execution context to the GPU.
45 void Flush(VkSemaphore semaphore = nullptr); 36 void Flush(VkSemaphore semaphore = nullptr);
46 37
@@ -82,6 +73,21 @@ public:
82 (void)chunk->Record(command); 73 (void)chunk->Record(command);
83 } 74 }
84 75
76 /// Returns the current command buffer tick.
77 [[nodiscard]] u64 CurrentTick() const noexcept {
78 return master_semaphore->CurrentTick();
79 }
80
81 /// Returns true when a tick has been triggered by the GPU.
82 [[nodiscard]] bool IsFree(u64 tick) const noexcept {
83 return master_semaphore->IsFree(tick);
84 }
85
86 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) {
88 master_semaphore->Wait(tick);
89 }
90
85 /// Returns the master timeline semaphore. 91 /// Returns the master timeline semaphore.
86 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { 92 [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
87 return *master_semaphore; 93 return *master_semaphore;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 61d52b961..40e2e0d38 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3106 entries.const_buffers.emplace_back(cbuf.second, cbuf.first); 3106 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
3107 } 3107 }
3108 for (const auto& [base, usage] : ir.GetGlobalMemory()) { 3108 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
3109 entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); 3109 entries.global_buffers.emplace_back(GlobalBufferEntry{
3110 .cbuf_index = base.cbuf_index,
3111 .cbuf_offset = base.cbuf_offset,
3112 .is_written = usage.is_written,
3113 });
3110 } 3114 }
3111 for (const auto& sampler : ir.GetSamplers()) { 3115 for (const auto& sampler : ir.GetSamplers()) {
3112 if (sampler.is_buffer) { 3116 if (sampler.is_buffer) {
@@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3127 entries.attributes.insert(GetGenericAttributeLocation(attribute)); 3131 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3128 } 3132 }
3129 } 3133 }
3134 for (const auto& buffer : entries.const_buffers) {
3135 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3136 }
3130 entries.clip_distances = ir.GetClipDistances(); 3137 entries.clip_distances = ir.GetClipDistances();
3131 entries.shader_length = ir.GetLength(); 3138 entries.shader_length = ir.GetLength();
3132 entries.uses_warps = ir.UsesWarps(); 3139 entries.uses_warps = ir.UsesWarps();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 26381e444..5d94132a5 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -39,24 +39,7 @@ private:
39 u32 index{}; 39 u32 index{};
40}; 40};
41 41
42class GlobalBufferEntry { 42struct GlobalBufferEntry {
43public:
44 constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_)
45 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {}
46
47 constexpr u32 GetCbufIndex() const {
48 return cbuf_index;
49 }
50
51 constexpr u32 GetCbufOffset() const {
52 return cbuf_offset;
53 }
54
55 constexpr bool IsWritten() const {
56 return is_written;
57 }
58
59private:
60 u32 cbuf_index{}; 43 u32 cbuf_index{};
61 u32 cbuf_offset{}; 44 u32 cbuf_offset{};
62 bool is_written{}; 45 bool is_written{};
@@ -78,6 +61,7 @@ struct ShaderEntries {
78 std::set<u32> attributes; 61 std::set<u32> attributes;
79 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
80 std::size_t shader_length{}; 63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
81 bool uses_warps{}; 65 bool uses_warps{};
82}; 66};
83 67
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..7a1232497 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
8 8
9#include <fmt/format.h> 9#include <fmt/format.h>
10 10
11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
12#include "common/bit_util.h" 13#include "common/bit_util.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
@@ -17,18 +18,119 @@
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
21namespace {
22// Maximum potential alignment of a Vulkan buffer
23constexpr VkDeviceSize MAX_ALIGNMENT = 256;
24// Maximum size to put elements in the stream buffer
25constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
26// Stream buffer size in bytes
27constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
28constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
29
30constexpr VkMemoryPropertyFlags HOST_FLAGS =
31 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
32constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
33
34bool IsStreamHeap(VkMemoryHeap heap) noexcept {
35 return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
36}
37
38std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
39 VkMemoryPropertyFlags flags) noexcept {
40 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
41 if (((type_mask >> type_index) & 1) == 0) {
42 // Memory type is incompatible
43 continue;
44 }
45 const VkMemoryType& memory_type = props.memoryTypes[type_index];
46 if ((memory_type.propertyFlags & flags) != flags) {
47 // Memory type doesn't have the flags we want
48 continue;
49 }
50 if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
51 // Memory heap is not suitable for streaming
52 continue;
53 }
54 // Success!
55 return type_index;
56 }
57 return std::nullopt;
58}
59
60u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
61 // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
62 std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
63 if (type) {
64 return *type;
65 }
66 // Otherwise try without the DEVICE_LOCAL_BIT
67 type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
68 if (type) {
69 return *type;
70 }
71 // This should never happen, and in case it does, signal it as an out of memory situation
72 throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
73}
74
75size_t Region(size_t iterator) noexcept {
76 return iterator / REGION_SIZE;
77}
78} // Anonymous namespace
20 79
21StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, 80StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
22 VKScheduler& scheduler_) 81 VKScheduler& scheduler_)
23 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} 82 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
83 const vk::Device& dev = device.GetLogical();
84 stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
85 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
86 .pNext = nullptr,
87 .flags = 0,
88 .size = STREAM_BUFFER_SIZE,
89 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
90 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
91 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
92 .queueFamilyIndexCount = 0,
93 .pQueueFamilyIndices = nullptr,
94 });
95 if (device.HasDebuggingToolAttached()) {
96 stream_buffer.SetObjectNameEXT("Stream Buffer");
97 }
98 VkMemoryDedicatedRequirements dedicated_reqs{
99 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
100 .pNext = nullptr,
101 .prefersDedicatedAllocation = VK_FALSE,
102 .requiresDedicatedAllocation = VK_FALSE,
103 };
104 const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
105 const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
106 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
107 const VkMemoryDedicatedAllocateInfo dedicated_info{
108 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
109 .pNext = nullptr,
110 .image = nullptr,
111 .buffer = *stream_buffer,
112 };
113 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
114 stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
115 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
116 .pNext = make_dedicated ? &dedicated_info : nullptr,
117 .allocationSize = requirements.size,
118 .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
119 });
120 if (device.HasDebuggingToolAttached()) {
121 stream_memory.SetObjectNameEXT("Stream Buffer Memory");
122 }
123 stream_buffer.BindMemory(*stream_memory, 0);
124 stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
125}
24 126
25StagingBufferPool::~StagingBufferPool() = default; 127StagingBufferPool::~StagingBufferPool() = default;
26 128
27StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { 129StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
28 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { 130 if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
29 return *ref; 131 return GetStreamBuffer(size);
30 } 132 }
31 return CreateStagingBuffer(size, usage); 133 return GetStagingBuffer(size, usage);
32} 134}
33 135
34void StagingBufferPool::TickFrame() { 136void StagingBufferPool::TickFrame() {
@@ -39,6 +141,52 @@ void StagingBufferPool::TickFrame() {
39 ReleaseCache(MemoryUsage::Download); 141 ReleaseCache(MemoryUsage::Download);
40} 142}
41 143
144StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
145 if (AreRegionsActive(Region(free_iterator) + 1,
146 std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
147 // Avoid waiting for the previous usages to be free
148 return GetStagingBuffer(size, MemoryUsage::Upload);
149 }
150 const u64 current_tick = scheduler.CurrentTick();
151 std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
152 current_tick);
153 used_iterator = iterator;
154 free_iterator = std::max(free_iterator, iterator + size);
155
156 if (iterator + size >= STREAM_BUFFER_SIZE) {
157 std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
158 current_tick);
159 used_iterator = 0;
160 iterator = 0;
161 free_iterator = size;
162
163 if (AreRegionsActive(0, Region(size) + 1)) {
164 // Avoid waiting for the previous usages to be free
165 return GetStagingBuffer(size, MemoryUsage::Upload);
166 }
167 }
168 const size_t offset = iterator;
169 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
170 return StagingBufferRef{
171 .buffer = *stream_buffer,
172 .offset = static_cast<VkDeviceSize>(offset),
173 .mapped_span = std::span<u8>(stream_pointer + offset, size),
174 };
175}
176
177bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
178 const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
179 return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
180 [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; });
181};
182
183StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) {
184 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
185 return *ref;
186 }
187 return CreateStagingBuffer(size, usage);
188}
189
42std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, 190std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
43 MemoryUsage usage) { 191 MemoryUsage usage) {
44 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; 192 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..69f7618de 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
19 19
20struct StagingBufferRef { 20struct StagingBufferRef {
21 VkBuffer buffer; 21 VkBuffer buffer;
22 VkDeviceSize offset;
22 std::span<u8> mapped_span; 23 std::span<u8> mapped_span;
23}; 24};
24 25
25class StagingBufferPool { 26class StagingBufferPool {
26public: 27public:
28 static constexpr size_t NUM_SYNCS = 16;
29
27 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, 30 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
28 VKScheduler& scheduler); 31 VKScheduler& scheduler);
29 ~StagingBufferPool(); 32 ~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
33 void TickFrame(); 36 void TickFrame();
34 37
35private: 38private:
39 struct StreamBufferCommit {
40 size_t upper_bound;
41 u64 tick;
42 };
43
36 struct StagingBuffer { 44 struct StagingBuffer {
37 vk::Buffer buffer; 45 vk::Buffer buffer;
38 MemoryCommit commit; 46 MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
42 StagingBufferRef Ref() const noexcept { 50 StagingBufferRef Ref() const noexcept {
43 return { 51 return {
44 .buffer = *buffer, 52 .buffer = *buffer,
53 .offset = 0,
45 .mapped_span = mapped_span, 54 .mapped_span = mapped_span,
46 }; 55 };
47 } 56 }
@@ -56,6 +65,12 @@ private:
56 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; 65 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
57 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; 66 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
58 67
68 StagingBufferRef GetStreamBuffer(size_t size);
69
70 bool AreRegionsActive(size_t region_begin, size_t region_end) const;
71
72 StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage);
73
59 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); 74 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
60 75
61 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); 76 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +85,15 @@ private:
70 MemoryAllocator& memory_allocator; 85 MemoryAllocator& memory_allocator;
71 VKScheduler& scheduler; 86 VKScheduler& scheduler;
72 87
88 vk::Buffer stream_buffer;
89 vk::DeviceMemory stream_memory;
90 u8* stream_pointer = nullptr;
91
92 size_t iterator = 0;
93 size_t used_iterator = 0;
94 size_t free_iterator = 0;
95 std::array<u64, NUM_SYNCS> sync_ticks{};
96
73 StagingBuffersCache device_local_cache; 97 StagingBuffersCache device_local_cache;
74 StagingBuffersCache upload_cache; 98 StagingBuffersCache upload_cache;
75 StagingBuffersCache download_cache; 99 StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 1779a2e30..e81fad007 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table;
30using Flags = Maxwell3D::DirtyState::Flags; 30using Flags = Maxwell3D::DirtyState::Flags;
31 31
32Flags MakeInvalidationFlags() { 32Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{ 33 static constexpr int INVALIDATION_FLAGS[]{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, 36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers,
37 }; 37 };
38 Flags flags{}; 38 Flags flags{};
39 for (const int flag : INVALIDATION_FLAGS) { 39 for (const int flag : INVALIDATION_FLAGS) {
40 flags[flag] = true; 40 flags[flag] = true;
41 } 41 }
42 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
43 flags[index] = true;
44 }
42 return flags; 45 return flags;
43} 46}
44 47
@@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
130StateTracker::StateTracker(Tegra::GPU& gpu) 133StateTracker::StateTracker(Tegra::GPU& gpu)
131 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 134 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
132 auto& tables = gpu.Maxwell3D().dirty.tables; 135 auto& tables = gpu.Maxwell3D().dirty.tables;
133 SetupDirtyRenderTargets(tables); 136 SetupDirtyFlags(tables);
134 SetupDirtyViewports(tables); 137 SetupDirtyViewports(tables);
135 SetupDirtyScissors(tables); 138 SetupDirtyScissors(tables);
136 SetupDirtyDepthBias(tables); 139 SetupDirtyDepthBias(tables);
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 725a2a05d..0b63bd6c8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
56 56
57} // Anonymous namespace 57} // Anonymous namespace
58 58
59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) 59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_,
60 : surface{surface_}, device{device_}, scheduler{scheduler_} {} 60 u32 width, u32 height, bool srgb)
61 : surface{surface_}, device{device_}, scheduler{scheduler_} {
62 Create(width, height, srgb);
63}
61 64
62VKSwapchain::~VKSwapchain() = default; 65VKSwapchain::~VKSwapchain() = default;
63 66
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 2eadd62b3..a728511e0 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -20,7 +20,8 @@ class VKScheduler;
20 20
21class VKSwapchain { 21class VKSwapchain {
22public: 22public:
23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); 23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler,
24 u32 width, u32 height, bool srgb);
24 ~VKSwapchain(); 25 ~VKSwapchain();
25 26
26 /// Creates (or recreates) the swapchain with a given size. 27 /// Creates (or recreates) the swapchain with a given size.
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index aa7c5d7c6..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, 426void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
427 VkImageAspectFlags aspect_mask, bool is_initialized, 427 VkImageAspectFlags aspect_mask, bool is_initialized,
428 std::span<const VkBufferImageCopy> copies) { 428 std::span<const VkBufferImageCopy> copies) {
429 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | 429 static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
430 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 430 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 431 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
432 static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
433 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
434 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
432 const VkImageMemoryBarrier read_barrier{ 435 const VkImageMemoryBarrier read_barrier{
433 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 436 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
434 .pNext = nullptr, 437 .pNext = nullptr,
435 .srcAccessMask = ACCESS_FLAGS, 438 .srcAccessMask = WRITE_ACCESS_FLAGS,
436 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 439 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
437 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, 440 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
438 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 441 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
439 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 442 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
440 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 443 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
441 .image = image, 444 .image = image,
442 .subresourceRange = 445 .subresourceRange{
443 { 446 .aspectMask = aspect_mask,
444 .aspectMask = aspect_mask, 447 .baseMipLevel = 0,
445 .baseMipLevel = 0, 448 .levelCount = VK_REMAINING_MIP_LEVELS,
446 .levelCount = VK_REMAINING_MIP_LEVELS, 449 .baseArrayLayer = 0,
447 .baseArrayLayer = 0, 450 .layerCount = VK_REMAINING_ARRAY_LAYERS,
448 .layerCount = VK_REMAINING_ARRAY_LAYERS, 451 },
449 },
450 }; 452 };
451 const VkImageMemoryBarrier write_barrier{ 453 const VkImageMemoryBarrier write_barrier{
452 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 454 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
453 .pNext = nullptr, 455 .pNext = nullptr,
454 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 456 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
455 .dstAccessMask = ACCESS_FLAGS, 457 .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
456 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 458 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
457 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 459 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
458 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 460 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
459 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 461 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
460 .image = image, 462 .image = image,
461 .subresourceRange = 463 .subresourceRange{
462 { 464 .aspectMask = aspect_mask,
463 .aspectMask = aspect_mask, 465 .baseMipLevel = 0,
464 .baseMipLevel = 0, 466 .levelCount = VK_REMAINING_MIP_LEVELS,
465 .levelCount = VK_REMAINING_MIP_LEVELS, 467 .baseArrayLayer = 0,
466 .baseArrayLayer = 0, 468 .layerCount = VK_REMAINING_ARRAY_LAYERS,
467 .layerCount = VK_REMAINING_ARRAY_LAYERS, 469 },
468 },
469 }; 470 };
470 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 471 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
471 read_barrier); 472 read_barrier);
@@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() {
569 scheduler.Finish(); 570 scheduler.Finish();
570} 571}
571 572
572ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { 573StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
573 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); 574 return staging_buffer_pool.Request(size, MemoryUsage::Upload);
574 return {
575 .handle = staging_ref.buffer,
576 .span = staging_ref.mapped_span,
577 };
578} 575}
579 576
580ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { 577StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
581 const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); 578 return staging_buffer_pool.Request(size, MemoryUsage::Download);
582 return {
583 .handle = staging_ref.buffer,
584 .span = staging_ref.mapped_span,
585 };
586} 579}
587 580
588void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 581void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
@@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
754 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 747 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
755 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 748 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
756 VK_ACCESS_TRANSFER_WRITE_BIT, 749 VK_ACCESS_TRANSFER_WRITE_BIT,
757 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 750 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
758 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 751 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
759 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 752 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
760 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 753 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
765 VkImageMemoryBarrier{ 758 VkImageMemoryBarrier{
766 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 759 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
767 .pNext = nullptr, 760 .pNext = nullptr,
768 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | 761 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
769 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
770 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
771 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
772 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | 762 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
773 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, 763 VK_ACCESS_TRANSFER_WRITE_BIT,
774 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 764 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
775 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 765 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
776 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 766 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
@@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
828 } 818 }
829} 819}
830 820
831void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 821void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
832 std::span<const BufferImageCopy> copies) {
833 // TODO: Move this to another API 822 // TODO: Move this to another API
834 scheduler->RequestOutsideRenderPassOperationContext(); 823 scheduler->RequestOutsideRenderPassOperationContext();
835 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 824 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
836 const VkBuffer src_buffer = map.handle; 825 const VkBuffer src_buffer = map.buffer;
837 const VkImage vk_image = *image; 826 const VkImage vk_image = *image;
838 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 827 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
839 const bool is_initialized = std::exchange(initialized, true); 828 const bool is_initialized = std::exchange(initialized, true);
@@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
843 }); 832 });
844} 833}
845 834
846void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 835void Image::UploadMemory(const StagingBufferRef& map,
847 std::span<const VideoCommon::BufferCopy> copies) { 836 std::span<const VideoCommon::BufferCopy> copies) {
848 // TODO: Move this to another API 837 // TODO: Move this to another API
849 scheduler->RequestOutsideRenderPassOperationContext(); 838 scheduler->RequestOutsideRenderPassOperationContext();
850 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); 839 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
851 const VkBuffer src_buffer = map.handle; 840 const VkBuffer src_buffer = map.buffer;
852 const VkBuffer dst_buffer = *buffer; 841 const VkBuffer dst_buffer = *buffer;
853 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { 842 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
854 // TODO: Barriers 843 // TODO: Barriers
@@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
856 }); 845 });
857} 846}
858 847
859void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 848void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
860 std::span<const BufferImageCopy> copies) { 849 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
861 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 850 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
862 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
863 vk_copies](vk::CommandBuffer cmdbuf) { 851 vk_copies](vk::CommandBuffer cmdbuf) {
864 // TODO: Barriers 852 const VkImageMemoryBarrier read_barrier{
865 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); 853 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
854 .pNext = nullptr,
855 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
856 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
857 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
858 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
859 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
860 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
861 .image = image,
862 .subresourceRange{
863 .aspectMask = aspect_mask,
864 .baseMipLevel = 0,
865 .levelCount = VK_REMAINING_MIP_LEVELS,
866 .baseArrayLayer = 0,
867 .layerCount = VK_REMAINING_ARRAY_LAYERS,
868 },
869 };
870 const VkImageMemoryBarrier image_write_barrier{
871 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
872 .pNext = nullptr,
873 .srcAccessMask = 0,
874 .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
875 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
876 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
877 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
878 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
879 .image = image,
880 .subresourceRange{
881 .aspectMask = aspect_mask,
882 .baseMipLevel = 0,
883 .levelCount = VK_REMAINING_MIP_LEVELS,
884 .baseArrayLayer = 0,
885 .layerCount = VK_REMAINING_ARRAY_LAYERS,
886 },
887 };
888 const VkMemoryBarrier memory_write_barrier{
889 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
890 .pNext = nullptr,
891 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
892 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
893 };
894 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
895 0, read_barrier);
896 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
897 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
898 0, memory_write_barrier, nullptr, image_write_barrier);
866 }); 899 });
867} 900}
868 901
@@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1127 .pAttachments = attachments.data(), 1160 .pAttachments = attachments.data(),
1128 .width = key.size.width, 1161 .width = key.size.width,
1129 .height = key.size.height, 1162 .height = key.size.height,
1130 .layers = static_cast<u32>(num_layers), 1163 .layers = static_cast<u32>(std::max(num_layers, 1)),
1131 }); 1164 });
1132 if (runtime.device.HasDebuggingToolAttached()) { 1165 if (runtime.device.HasDebuggingToolAttached()) {
1133 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); 1166 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 8d29361a1..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
10#include "video_core/texture_cache/texture_cache.h" 11#include "video_core/texture_cache/texture_cache.h"
11#include "video_core/vulkan_common/vulkan_memory_allocator.h" 12#include "video_core/vulkan_common/vulkan_memory_allocator.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> {
53 54
54namespace Vulkan { 55namespace Vulkan {
55 56
56struct ImageBufferMap {
57 [[nodiscard]] VkBuffer Handle() const noexcept {
58 return handle;
59 }
60
61 [[nodiscard]] std::span<u8> Span() const noexcept {
62 return span;
63 }
64
65 VkBuffer handle;
66 std::span<u8> span;
67};
68
69struct TextureCacheRuntime { 57struct TextureCacheRuntime {
70 const Device& device; 58 const Device& device;
71 VKScheduler& scheduler; 59 VKScheduler& scheduler;
@@ -76,9 +64,9 @@ struct TextureCacheRuntime {
76 64
77 void Finish(); 65 void Finish();
78 66
79 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); 67 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
80 68
81 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); 69 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
82 70
83 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 71 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
84 const std::array<Offset2D, 2>& dst_region, 72 const std::array<Offset2D, 2>& dst_region,
@@ -94,7 +82,7 @@ struct TextureCacheRuntime {
94 return false; 82 return false;
95 } 83 }
96 84
97 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, 85 void AccelerateImageUpload(Image&, const StagingBufferRef&,
98 std::span<const VideoCommon::SwizzleParameters>) { 86 std::span<const VideoCommon::SwizzleParameters>) {
99 UNREACHABLE(); 87 UNREACHABLE();
100 } 88 }
@@ -112,13 +100,12 @@ public:
112 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
113 VAddr cpu_addr); 101 VAddr cpu_addr);
114 102
115 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 103 void UploadMemory(const StagingBufferRef& map,
116 std::span<const VideoCommon::BufferImageCopy> copies); 104 std::span<const VideoCommon::BufferImageCopy> copies);
117 105
118 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 106 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
119 std::span<const VideoCommon::BufferCopy> copies);
120 107
121 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, 108 void DownloadMemory(const StagingBufferRef& map,
122 std::span<const VideoCommon::BufferImageCopy> copies); 109 std::span<const VideoCommon::BufferImageCopy> copies);
123 110
124 [[nodiscard]] VkImage Handle() const noexcept { 111 [[nodiscard]] VkImage Handle() const noexcept {
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 3b40db9bc..02adcf9c7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -64,6 +64,7 @@ void AsyncShaders::FreeWorkers() {
64 64
65void AsyncShaders::KillWorkers() { 65void AsyncShaders::KillWorkers() {
66 is_thread_exiting.store(true); 66 is_thread_exiting.store(true);
67 cv.notify_all();
67 for (auto& thread : worker_threads) { 68 for (auto& thread : worker_threads) {
68 thread.detach(); 69 thread.detach();
69 } 70 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 0dbb1a31f..7fdff6e56 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -9,16 +9,7 @@
9#include <shared_mutex> 9#include <shared_mutex>
10#include <thread> 10#include <thread>
11 11
12// This header includes both Vulkan and OpenGL headers, this has to be fixed
13// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
14// Forcefully include glad early and undefine macros
15#include <glad/glad.h> 12#include <glad/glad.h>
16#ifdef CreateEvent
17#undef CreateEvent
18#endif
19#ifdef CreateSemaphore
20#undef CreateSemaphore
21#endif
22 13
23#include "common/common_types.h" 14#include "common/common_types.h"
24#include "video_core/renderer_opengl/gl_device.h" 15#include "video_core/renderer_opengl/gl_device.h"
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d3ea07aac..5f88537bc 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
76 case SystemVariable::InvocationId: 76 case SystemVariable::InvocationId:
77 return Operation(OperationCode::InvocationId); 77 return Operation(OperationCode::InvocationId);
78 case SystemVariable::Ydirection: 78 case SystemVariable::Ydirection:
79 uses_y_negate = true;
79 return Operation(OperationCode::YNegate); 80 return Operation(OperationCode::YNegate);
80 case SystemVariable::InvocationInfo: 81 case SystemVariable::InvocationInfo:
81 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); 82 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0c6ab0f07..1cd7c14d7 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -139,6 +139,10 @@ public:
139 return uses_legacy_varyings; 139 return uses_legacy_varyings;
140 } 140 }
141 141
142 bool UsesYNegate() const {
143 return uses_y_negate;
144 }
145
142 bool UsesWarps() const { 146 bool UsesWarps() const {
143 return uses_warps; 147 return uses_warps;
144 } 148 }
@@ -465,6 +469,7 @@ private:
465 bool uses_instance_id{}; 469 bool uses_instance_id{};
466 bool uses_vertex_id{}; 470 bool uses_vertex_id{};
467 bool uses_legacy_varyings{}; 471 bool uses_legacy_varyings{};
472 bool uses_y_negate{};
468 bool uses_warps{}; 473 bool uses_warps{};
469 bool uses_indexed_samplers{}; 474 bool uses_indexed_samplers{};
470 475
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d1080300f..b1da69971 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -103,9 +103,6 @@ public:
103 /// Notify the cache that a new frame has been queued 103 /// Notify the cache that a new frame has been queued
104 void TickFrame(); 104 void TickFrame();
105 105
106 /// Return an unique mutually exclusive lock for the cache
107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
108
109 /// Return a constant reference to the given image view id 106 /// Return a constant reference to the given image view id
110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; 107 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
111 108
@@ -179,6 +176,8 @@ public:
179 /// Return true when a CPU region is modified from the GPU 176 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 177 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
181 178
179 std::mutex mutex;
180
182private: 181private:
183 /// Iterate over all page indices in a range 182 /// Iterate over all page indices in a range
184 template <typename Func> 183 template <typename Func>
@@ -212,8 +211,8 @@ private:
212 void RefreshContents(Image& image); 211 void RefreshContents(Image& image);
213 212
214 /// Upload data from guest to an image 213 /// Upload data from guest to an image
215 template <typename MapBuffer> 214 template <typename StagingBuffer>
216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); 215 void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
217 216
218 /// Find or create an image view from a guest descriptor 217 /// Find or create an image view from a guest descriptor
219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); 218 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -325,8 +324,6 @@ private:
325 324
326 RenderTargets render_targets; 325 RenderTargets render_targets;
327 326
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views; 327 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers; 328 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 329 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
@@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() {
386} 383}
387 384
388template <class P> 385template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { 386const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id]; 387 return slot_image_views[id];
396} 388}
@@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
598 }); 590 });
599 for (const ImageId image_id : images) { 591 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id]; 592 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); 593 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info); 594 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies); 595 image.DownloadMemory(map, copies);
604 runtime.Finish(); 596 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); 597 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
606 } 598 }
607} 599}
608 600
@@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() {
757 for (const ImageId image_id : download_ids) { 749 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 750 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 } 751 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes); 752 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
761 size_t buffer_offset = 0; 753 const size_t original_offset = download_map.offset;
762 for (const ImageId image_id : download_ids) { 754 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id]; 755 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info); 756 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies); 757 image.DownloadMemory(download_map, copies);
766 buffer_offset += image.unswizzled_size_bytes; 758 download_map.offset += image.unswizzled_size_bytes;
767 } 759 }
768 // Wait for downloads to finish 760 // Wait for downloads to finish
769 runtime.Finish(); 761 runtime.Finish();
770 762
771 buffer_offset = 0; 763 download_map.offset = original_offset;
772 const std::span<u8> download_span = download_map.Span(); 764 std::span<u8> download_span = download_map.mapped_span;
773 for (const ImageId image_id : download_ids) { 765 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id]; 766 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info); 767 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset); 768 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); 769 download_map.offset += image.unswizzled_size_bytes;
778 buffer_offset += image.unswizzled_size_bytes; 770 download_span = download_span.subspan(image.unswizzled_size_bytes);
779 } 771 }
780 committed_downloads.pop(); 772 committed_downloads.pop();
781} 773}
@@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) {
806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 798 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
807 return; 799 return;
808 } 800 }
809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); 801 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
810 UploadImageContents(image, map, 0); 802 UploadImageContents(image, staging);
811 runtime.InsertUploadMemoryBarrier(); 803 runtime.InsertUploadMemoryBarrier();
812} 804}
813 805
814template <class P> 806template <class P>
815template <typename MapBuffer> 807template <typename StagingBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { 808void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); 809 const std::span<u8> mapped_span = staging.mapped_span;
818 const GPUVAddr gpu_addr = image.gpu_addr; 810 const GPUVAddr gpu_addr = image.gpu_addr;
819 811
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 812 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 813 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info); 814 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); 815 runtime.AccelerateImageUpload(image, staging, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) { 816 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 817 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 818 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 819 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies); 820 image.UploadMemory(staging, copies);
829 } else if (image.info.type == ImageType::Buffer) { 821 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; 822 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies); 823 image.UploadMemory(staging, copies);
832 } else { 824 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 825 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies); 826 image.UploadMemory(staging, copies);
835 } 827 }
836} 828}
837 829
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index bb2cdef81..a0bc1f7b6 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -169,40 +169,6 @@ template <u32 GOB_EXTENT>
169 return Common::DivCeil(AdjustMipSize(size, level), block_size); 169 return Common::DivCeil(AdjustMipSize(size, level), block_size);
170} 170}
171 171
172[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
173 return config.Width() * config.Height() * BytesPerBlock(format);
174}
175
176[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
177 switch (type) {
178 case TextureType::Texture2D:
179 case TextureType::Texture2DArray:
180 case TextureType::Texture2DNoMipmap:
181 case TextureType::Texture3D:
182 case TextureType::TextureCubeArray:
183 case TextureType::TextureCubemap:
184 return true;
185 case TextureType::Texture1D:
186 case TextureType::Texture1DArray:
187 case TextureType::Texture1DBuffer:
188 return false;
189 }
190 return false;
191}
192
193[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
194 switch (type) {
195 case ImageType::e2D:
196 case ImageType::e3D:
197 case ImageType::Linear:
198 return true;
199 case ImageType::e1D:
200 case ImageType::Buffer:
201 return false;
202 }
203 UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
204}
205
206[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { 172[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
207 switch (num_samples) { 173 switch (num_samples) {
208 case 1: 174 case 1:
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 53444e945..e1b38c6ac 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -38,19 +38,18 @@ namespace VideoCore {
38 38
39std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { 39std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
40 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); 40 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
41 std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( 41 const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
42 system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); 42 auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
43
44 auto context = emu_window.CreateSharedContext(); 43 auto context = emu_window.CreateSharedContext();
45 const auto scope = context->Acquire(); 44 auto scope = context->Acquire();
46 45 try {
47 auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); 46 auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
48 if (!renderer->Init()) { 47 gpu->BindRenderer(std::move(renderer));
48 return gpu;
49 } catch (const std::runtime_error& exception) {
50 LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
49 return nullptr; 51 return nullptr;
50 } 52 }
51
52 gpu->BindRenderer(std::move(renderer));
53 return gpu;
54} 53}
55 54
56u16 GetResolutionScaleFactor(const RendererBase& renderer) { 55u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 51f53bc39..34d396434 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -18,27 +18,22 @@
18#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
19 19
20namespace Vulkan { 20namespace Vulkan {
21
22namespace { 21namespace {
23
24namespace Alternatives { 22namespace Alternatives {
25 23constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{
26constexpr std::array Depth24UnormS8_UINT{
27 VK_FORMAT_D32_SFLOAT_S8_UINT, 24 VK_FORMAT_D32_SFLOAT_S8_UINT,
28 VK_FORMAT_D16_UNORM_S8_UINT, 25 VK_FORMAT_D16_UNORM_S8_UINT,
29 VkFormat{}, 26 VK_FORMAT_UNDEFINED,
30}; 27};
31 28
32constexpr std::array Depth16UnormS8_UINT{ 29constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
33 VK_FORMAT_D24_UNORM_S8_UINT, 30 VK_FORMAT_D24_UNORM_S8_UINT,
34 VK_FORMAT_D32_SFLOAT_S8_UINT, 31 VK_FORMAT_D32_SFLOAT_S8_UINT,
35 VkFormat{}, 32 VK_FORMAT_UNDEFINED,
36}; 33};
37
38} // namespace Alternatives 34} // namespace Alternatives
39 35
40constexpr std::array REQUIRED_EXTENSIONS{ 36constexpr std::array REQUIRED_EXTENSIONS{
41 VK_KHR_SWAPCHAIN_EXTENSION_NAME,
42 VK_KHR_MAINTENANCE1_EXTENSION_NAME, 37 VK_KHR_MAINTENANCE1_EXTENSION_NAME,
43 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, 38 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
44 VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, 39 VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
@@ -51,7 +46,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
51 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 46 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
52 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 47 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
53 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 48 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
49 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
54 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, 50 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
51#ifdef _WIN32
52 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
53#endif
54#ifdef __linux__
55 VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
56#endif
55}; 57};
56 58
57template <typename T> 59template <typename T>
@@ -63,9 +65,9 @@ void SetNext(void**& next, T& data) {
63constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { 65constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
64 switch (format) { 66 switch (format) {
65 case VK_FORMAT_D24_UNORM_S8_UINT: 67 case VK_FORMAT_D24_UNORM_S8_UINT:
66 return Alternatives::Depth24UnormS8_UINT.data(); 68 return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
67 case VK_FORMAT_D16_UNORM_S8_UINT: 69 case VK_FORMAT_D16_UNORM_S8_UINT:
68 return Alternatives::Depth16UnormS8_UINT.data(); 70 return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
69 default: 71 default:
70 return nullptr; 72 return nullptr;
71 } 73 }
@@ -195,78 +197,77 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
195 const vk::InstanceDispatch& dld_) 197 const vk::InstanceDispatch& dld_)
196 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 198 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
197 format_properties{GetFormatProperties(physical)} { 199 format_properties{GetFormatProperties(physical)} {
198 CheckSuitability(); 200 CheckSuitability(surface != nullptr);
199 SetupFamilies(surface); 201 SetupFamilies(surface);
200 SetupFeatures(); 202 SetupFeatures();
201 203
202 const auto queue_cis = GetDeviceQueueCreateInfos(); 204 const auto queue_cis = GetDeviceQueueCreateInfos();
203 const std::vector extensions = LoadExtensions(); 205 const std::vector extensions = LoadExtensions(surface != nullptr);
204 206
205 VkPhysicalDeviceFeatures2 features2{ 207 VkPhysicalDeviceFeatures2 features2{
206 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, 208 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
207 .pNext = nullptr, 209 .pNext = nullptr,
208 .features{}, 210 .features{
211 .robustBufferAccess = true,
212 .fullDrawIndexUint32 = false,
213 .imageCubeArray = true,
214 .independentBlend = true,
215 .geometryShader = true,
216 .tessellationShader = true,
217 .sampleRateShading = false,
218 .dualSrcBlend = false,
219 .logicOp = false,
220 .multiDrawIndirect = false,
221 .drawIndirectFirstInstance = false,
222 .depthClamp = true,
223 .depthBiasClamp = true,
224 .fillModeNonSolid = false,
225 .depthBounds = false,
226 .wideLines = false,
227 .largePoints = true,
228 .alphaToOne = false,
229 .multiViewport = true,
230 .samplerAnisotropy = true,
231 .textureCompressionETC2 = false,
232 .textureCompressionASTC_LDR = is_optimal_astc_supported,
233 .textureCompressionBC = false,
234 .occlusionQueryPrecise = true,
235 .pipelineStatisticsQuery = false,
236 .vertexPipelineStoresAndAtomics = true,
237 .fragmentStoresAndAtomics = true,
238 .shaderTessellationAndGeometryPointSize = false,
239 .shaderImageGatherExtended = true,
240 .shaderStorageImageExtendedFormats = false,
241 .shaderStorageImageMultisample = is_shader_storage_image_multisample,
242 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
243 .shaderStorageImageWriteWithoutFormat = true,
244 .shaderUniformBufferArrayDynamicIndexing = false,
245 .shaderSampledImageArrayDynamicIndexing = false,
246 .shaderStorageBufferArrayDynamicIndexing = false,
247 .shaderStorageImageArrayDynamicIndexing = false,
248 .shaderClipDistance = false,
249 .shaderCullDistance = false,
250 .shaderFloat64 = false,
251 .shaderInt64 = false,
252 .shaderInt16 = false,
253 .shaderResourceResidency = false,
254 .shaderResourceMinLod = false,
255 .sparseBinding = false,
256 .sparseResidencyBuffer = false,
257 .sparseResidencyImage2D = false,
258 .sparseResidencyImage3D = false,
259 .sparseResidency2Samples = false,
260 .sparseResidency4Samples = false,
261 .sparseResidency8Samples = false,
262 .sparseResidency16Samples = false,
263 .sparseResidencyAliased = false,
264 .variableMultisampleRate = false,
265 .inheritedQueries = false,
266 },
209 }; 267 };
210 const void* first_next = &features2; 268 const void* first_next = &features2;
211 void** next = &features2.pNext; 269 void** next = &features2.pNext;
212 270
213 features2.features = {
214 .robustBufferAccess = false,
215 .fullDrawIndexUint32 = false,
216 .imageCubeArray = true,
217 .independentBlend = true,
218 .geometryShader = true,
219 .tessellationShader = true,
220 .sampleRateShading = false,
221 .dualSrcBlend = false,
222 .logicOp = false,
223 .multiDrawIndirect = false,
224 .drawIndirectFirstInstance = false,
225 .depthClamp = true,
226 .depthBiasClamp = true,
227 .fillModeNonSolid = false,
228 .depthBounds = false,
229 .wideLines = false,
230 .largePoints = true,
231 .alphaToOne = false,
232 .multiViewport = true,
233 .samplerAnisotropy = true,
234 .textureCompressionETC2 = false,
235 .textureCompressionASTC_LDR = is_optimal_astc_supported,
236 .textureCompressionBC = false,
237 .occlusionQueryPrecise = true,
238 .pipelineStatisticsQuery = false,
239 .vertexPipelineStoresAndAtomics = true,
240 .fragmentStoresAndAtomics = true,
241 .shaderTessellationAndGeometryPointSize = false,
242 .shaderImageGatherExtended = true,
243 .shaderStorageImageExtendedFormats = false,
244 .shaderStorageImageMultisample = is_shader_storage_image_multisample,
245 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
246 .shaderStorageImageWriteWithoutFormat = true,
247 .shaderUniformBufferArrayDynamicIndexing = false,
248 .shaderSampledImageArrayDynamicIndexing = false,
249 .shaderStorageBufferArrayDynamicIndexing = false,
250 .shaderStorageImageArrayDynamicIndexing = false,
251 .shaderClipDistance = false,
252 .shaderCullDistance = false,
253 .shaderFloat64 = false,
254 .shaderInt64 = false,
255 .shaderInt16 = false,
256 .shaderResourceResidency = false,
257 .shaderResourceMinLod = false,
258 .sparseBinding = false,
259 .sparseResidencyBuffer = false,
260 .sparseResidencyImage2D = false,
261 .sparseResidencyImage3D = false,
262 .sparseResidency2Samples = false,
263 .sparseResidency4Samples = false,
264 .sparseResidency8Samples = false,
265 .sparseResidency16Samples = false,
266 .sparseResidencyAliased = false,
267 .variableMultisampleRate = false,
268 .inheritedQueries = false,
269 };
270 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ 271 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
271 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, 272 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
272 .pNext = nullptr, 273 .pNext = nullptr,
@@ -379,20 +380,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
379 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 380 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
380 } 381 }
381 382
382 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
383 if (ext_robustness2) {
384 robustness2 = {
385 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
386 .pNext = nullptr,
387 .robustBufferAccess2 = false,
388 .robustImageAccess2 = true,
389 .nullDescriptor = true,
390 };
391 SetNext(next, robustness2);
392 } else {
393 LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
394 }
395
396 if (!ext_depth_range_unrestricted) { 383 if (!ext_depth_range_unrestricted) {
397 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 384 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
398 } 385 }
@@ -535,16 +522,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
535 return (supported_usage & wanted_usage) == wanted_usage; 522 return (supported_usage & wanted_usage) == wanted_usage;
536} 523}
537 524
538void Device::CheckSuitability() const { 525void Device::CheckSuitability(bool requires_swapchain) const {
539 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; 526 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
527 bool has_swapchain = false;
540 for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { 528 for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
541 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { 529 const std::string_view name{property.extensionName};
530 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
542 if (available_extensions[i]) { 531 if (available_extensions[i]) {
543 continue; 532 continue;
544 } 533 }
545 const std::string_view name{property.extensionName};
546 available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; 534 available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
547 } 535 }
536 has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME;
548 } 537 }
549 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { 538 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
550 if (available_extensions[i]) { 539 if (available_extensions[i]) {
@@ -553,6 +542,11 @@ void Device::CheckSuitability() const {
553 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); 542 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
554 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 543 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
555 } 544 }
545 if (requires_swapchain && !has_swapchain) {
546 LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain");
547 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
548 }
549
556 struct LimitTuple { 550 struct LimitTuple {
557 u32 minimum; 551 u32 minimum;
558 u32 value; 552 u32 value;
@@ -572,9 +566,20 @@ void Device::CheckSuitability() const {
572 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 566 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
573 } 567 }
574 } 568 }
575 const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; 569 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
570 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
571
572 VkPhysicalDeviceFeatures2 features2{};
573 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
574 features2.pNext = &robustness2;
575
576 physical.GetFeatures2KHR(features2);
577
578 const VkPhysicalDeviceFeatures& features{features2.features};
576 const std::array feature_report{ 579 const std::array feature_report{
580 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
577 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 581 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
582 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
578 std::make_pair(features.imageCubeArray, "imageCubeArray"), 583 std::make_pair(features.imageCubeArray, "imageCubeArray"),
579 std::make_pair(features.independentBlend, "independentBlend"), 584 std::make_pair(features.independentBlend, "independentBlend"),
580 std::make_pair(features.depthClamp, "depthClamp"), 585 std::make_pair(features.depthClamp, "depthClamp"),
@@ -589,6 +594,9 @@ void Device::CheckSuitability() const {
589 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 594 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
590 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 595 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
591 "shaderStorageImageWriteWithoutFormat"), 596 "shaderStorageImageWriteWithoutFormat"),
597 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
598 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
599 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
592 }; 600 };
593 for (const auto& [is_supported, name] : feature_report) { 601 for (const auto& [is_supported, name] : feature_report) {
594 if (is_supported) { 602 if (is_supported) {
@@ -599,17 +607,19 @@ void Device::CheckSuitability() const {
599 } 607 }
600} 608}
601 609
602std::vector<const char*> Device::LoadExtensions() { 610std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
603 std::vector<const char*> extensions; 611 std::vector<const char*> extensions;
604 extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); 612 extensions.reserve(8 + REQUIRED_EXTENSIONS.size());
605 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); 613 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
614 if (requires_surface) {
615 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
616 }
606 617
607 bool has_khr_shader_float16_int8{}; 618 bool has_khr_shader_float16_int8{};
608 bool has_ext_subgroup_size_control{}; 619 bool has_ext_subgroup_size_control{};
609 bool has_ext_transform_feedback{}; 620 bool has_ext_transform_feedback{};
610 bool has_ext_custom_border_color{}; 621 bool has_ext_custom_border_color{};
611 bool has_ext_extended_dynamic_state{}; 622 bool has_ext_extended_dynamic_state{};
612 bool has_ext_robustness2{};
613 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 623 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
614 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 624 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
615 bool push) { 625 bool push) {
@@ -637,14 +647,12 @@ std::vector<const char*> Device::LoadExtensions() {
637 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 647 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
638 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 648 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
639 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 649 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
640 test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
641 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 650 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
642 if (Settings::values.renderer_debug) { 651 if (Settings::values.renderer_debug) {
643 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, 652 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
644 true); 653 true);
645 } 654 }
646 } 655 }
647
648 VkPhysicalDeviceFeatures2KHR features; 656 VkPhysicalDeviceFeatures2KHR features;
649 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; 657 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
650 658
@@ -661,7 +669,6 @@ std::vector<const char*> Device::LoadExtensions() {
661 is_float16_supported = float16_int8_features.shaderFloat16; 669 is_float16_supported = float16_int8_features.shaderFloat16;
662 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 670 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
663 } 671 }
664
665 if (has_ext_subgroup_size_control) { 672 if (has_ext_subgroup_size_control) {
666 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; 673 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
667 subgroup_features.sType = 674 subgroup_features.sType =
@@ -688,7 +695,6 @@ std::vector<const char*> Device::LoadExtensions() {
688 } else { 695 } else {
689 is_warp_potentially_bigger = true; 696 is_warp_potentially_bigger = true;
690 } 697 }
691
692 if (has_ext_transform_feedback) { 698 if (has_ext_transform_feedback) {
693 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; 699 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
694 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; 700 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -710,7 +716,6 @@ std::vector<const char*> Device::LoadExtensions() {
710 ext_transform_feedback = true; 716 ext_transform_feedback = true;
711 } 717 }
712 } 718 }
713
714 if (has_ext_custom_border_color) { 719 if (has_ext_custom_border_color) {
715 VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; 720 VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
716 border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; 721 border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
@@ -723,7 +728,6 @@ std::vector<const char*> Device::LoadExtensions() {
723 ext_custom_border_color = true; 728 ext_custom_border_color = true;
724 } 729 }
725 } 730 }
726
727 if (has_ext_extended_dynamic_state) { 731 if (has_ext_extended_dynamic_state) {
728 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; 732 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
729 dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; 733 dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
@@ -736,19 +740,6 @@ std::vector<const char*> Device::LoadExtensions() {
736 ext_extended_dynamic_state = true; 740 ext_extended_dynamic_state = true;
737 } 741 }
738 } 742 }
739
740 if (has_ext_robustness2) {
741 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
742 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
743 robustness2.pNext = nullptr;
744 features.pNext = &robustness2;
745 physical.GetFeatures2KHR(features);
746 if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
747 extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
748 ext_robustness2 = true;
749 }
750 }
751
752 return extensions; 743 return extensions;
753} 744}
754 745
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4b66dba7a..67d70cd22 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer };
23const u32 GuestWarpSize = 32; 23const u32 GuestWarpSize = 32;
24 24
25/// Handles data specific to a physical device. 25/// Handles data specific to a physical device.
26class Device final { 26class Device {
27public: 27public:
28 explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, 28 explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
29 const vk::InstanceDispatch& dld); 29 const vk::InstanceDispatch& dld);
@@ -227,10 +227,10 @@ public:
227 227
228private: 228private:
229 /// Checks if the physical device is suitable. 229 /// Checks if the physical device is suitable.
230 void CheckSuitability() const; 230 void CheckSuitability(bool requires_swapchain) const;
231 231
232 /// Loads extensions into a vector and stores available ones in this object. 232 /// Loads extensions into a vector and stores available ones in this object.
233 std::vector<const char*> LoadExtensions(); 233 std::vector<const char*> LoadExtensions(bool requires_surface);
234 234
235 /// Sets up queue families. 235 /// Sets up queue families.
236 void SetupFamilies(VkSurfaceKHR surface); 236 void SetupFamilies(VkSurfaceKHR surface);
@@ -285,7 +285,6 @@ private:
285 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 285 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
286 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 286 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
287 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 287 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
288 bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
289 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 288 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
290 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 289 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
291 bool has_renderdoc{}; ///< Has RenderDoc attached 290 bool has_renderdoc{}; ///< Has RenderDoc attached
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 889ecda0c..bfd6e6add 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <future>
6#include <optional> 7#include <optional>
7#include <span> 8#include <span>
8#include <utility> 9#include <utility>
@@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
140 VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); 141 VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
141 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); 142 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
142 } 143 }
143 vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); 144 vk::Instance instance =
145 std::async([&] {
146 return vk::Instance::Create(required_version, layers, extensions, dld);
147 }).get();
144 if (!vk::Load(*instance, dld)) { 148 if (!vk::Load(*instance, dld)) {
145 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); 149 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
146 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 150 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index d6eb3af31..2a8b7a907 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -7,6 +7,8 @@
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9 9
10#include <glad/glad.h>
11
10#include "common/alignment.h" 12#include "common/alignment.h"
11#include "common/assert.h" 13#include "common/assert.h"
12#include "common/common_types.h" 14#include "common/common_types.h"
@@ -55,10 +57,24 @@ struct Range {
55 57
56class MemoryAllocation { 58class MemoryAllocation {
57public: 59public:
58 explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_, 60 explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties,
59 VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) 61 u64 allocation_size_, u32 type)
60 : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, 62 : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties},
61 property_flags{properties}, shifted_memory_type{1U << type} {} 63 shifted_memory_type{1U << type} {}
64
65#if defined(_WIN32) || defined(__linux__)
66 ~MemoryAllocation() {
67 if (owning_opengl_handle != 0) {
68 glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
69 }
70 }
71#endif
72
73 MemoryAllocation& operator=(const MemoryAllocation&) = delete;
74 MemoryAllocation(const MemoryAllocation&) = delete;
75
76 MemoryAllocation& operator=(MemoryAllocation&&) = delete;
77 MemoryAllocation(MemoryAllocation&&) = delete;
62 78
63 [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { 79 [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
64 const std::optional<u64> alloc = FindFreeRegion(size, alignment); 80 const std::optional<u64> alloc = FindFreeRegion(size, alignment);
@@ -88,6 +104,31 @@ public:
88 return memory_mapped_span; 104 return memory_mapped_span;
89 } 105 }
90 106
107#ifdef _WIN32
108 [[nodiscard]] u32 ExportOpenGLHandle() {
109 if (!owning_opengl_handle) {
110 glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
111 glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
112 GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
113 memory.GetMemoryWin32HandleKHR());
114 }
115 return owning_opengl_handle;
116 }
117#elif __linux__
118 [[nodiscard]] u32 ExportOpenGLHandle() {
119 if (!owning_opengl_handle) {
120 glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
121 glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
122 memory.GetMemoryFdKHR());
123 }
124 return owning_opengl_handle;
125 }
126#else
127 [[nodiscard]] u32 ExportOpenGLHandle() {
128 return 0;
129 }
130#endif
131
91 /// Returns whether this allocation is compatible with the arguments. 132 /// Returns whether this allocation is compatible with the arguments.
92 [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { 133 [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
93 return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; 134 return (flags & property_flags) && (type_mask & shifted_memory_type) != 0;
@@ -118,13 +159,15 @@ private:
118 return candidate; 159 return candidate;
119 } 160 }
120 161
121 const Device& device; ///< Vulkan device.
122 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 162 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
123 const u64 allocation_size; ///< Size of this allocation. 163 const u64 allocation_size; ///< Size of this allocation.
124 const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. 164 const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
125 const u32 shifted_memory_type; ///< Shifted Vulkan memory type. 165 const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
126 std::vector<Range> commits; ///< All commit ranges done from this allocation. 166 std::vector<Range> commits; ///< All commit ranges done from this allocation.
127 std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. 167 std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
168#if defined(_WIN32) || defined(__linux__)
169 u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
170#endif
128}; 171};
129 172
130MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, 173MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() {
156 return span; 199 return span;
157} 200}
158 201
202u32 MemoryCommit::ExportOpenGLHandle() const {
203 return allocation->ExportOpenGLHandle();
204}
205
159void MemoryCommit::Release() { 206void MemoryCommit::Release() {
160 if (allocation) { 207 if (allocation) {
161 allocation->Free(begin); 208 allocation->Free(begin);
162 } 209 }
163} 210}
164 211
165MemoryAllocator::MemoryAllocator(const Device& device_) 212MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
166 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} 213 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
214 export_allocations{export_allocations_} {}
167 215
168MemoryAllocator::~MemoryAllocator() = default; 216MemoryAllocator::~MemoryAllocator() = default;
169 217
@@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage)
196 244
197void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { 245void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
198 const u32 type = FindType(flags, type_mask).value(); 246 const u32 type = FindType(flags, type_mask).value();
247 const VkExportMemoryAllocateInfo export_allocate_info{
248 .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
249 .pNext = nullptr,
250#ifdef _WIN32
251 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
252#elif __linux__
253 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
254#else
255 .handleTypes = 0,
256#endif
257 };
199 vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ 258 vk::DeviceMemory memory = device.GetLogical().AllocateMemory({
200 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 259 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
201 .pNext = nullptr, 260 .pNext = export_allocations ? &export_allocate_info : nullptr,
202 .allocationSize = size, 261 .allocationSize = size,
203 .memoryTypeIndex = type, 262 .memoryTypeIndex = type,
204 }); 263 });
205 allocations.push_back( 264 allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type));
206 std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type));
207} 265}
208 266
209std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, 267std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index 9e6cfabf9..d1ce29450 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -43,6 +43,9 @@ public:
43 /// It will map the backing allocation if it hasn't been mapped before. 43 /// It will map the backing allocation if it hasn't been mapped before.
44 std::span<u8> Map(); 44 std::span<u8> Map();
45 45
46 /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
47 u32 ExportOpenGLHandle() const;
48
46 /// Returns the Vulkan memory handler. 49 /// Returns the Vulkan memory handler.
47 VkDeviceMemory Memory() const { 50 VkDeviceMemory Memory() const {
48 return memory; 51 return memory;
@@ -67,7 +70,15 @@ private:
67/// Allocates and releases memory allocations on demand. 70/// Allocates and releases memory allocations on demand.
68class MemoryAllocator { 71class MemoryAllocator {
69public: 72public:
70 explicit MemoryAllocator(const Device& device_); 73 /**
74 * Construct memory allocator
75 *
76 * @param device_ Device to allocate from
77 * @param export_allocations_ True when allocations have to be exported
78 *
79 * @throw vk::Exception on failure
80 */
81 explicit MemoryAllocator(const Device& device_, bool export_allocations_);
71 ~MemoryAllocator(); 82 ~MemoryAllocator();
72 83
73 MemoryAllocator& operator=(const MemoryAllocator&) = delete; 84 MemoryAllocator& operator=(const MemoryAllocator&) = delete;
@@ -106,8 +117,9 @@ private:
106 /// Returns index to the fastest memory type compatible with the passed requirements. 117 /// Returns index to the fastest memory type compatible with the passed requirements.
107 std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; 118 std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
108 119
109 const Device& device; ///< Device handle. 120 const Device& device; ///< Device handle.
110 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. 121 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
122 const bool export_allocations; ///< True when memory allocations have to be exported.
111 std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. 123 std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
112}; 124};
113 125
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 5e15ad607..2aa0ffbe6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
168 X(vkFreeCommandBuffers); 168 X(vkFreeCommandBuffers);
169 X(vkFreeDescriptorSets); 169 X(vkFreeDescriptorSets);
170 X(vkFreeMemory); 170 X(vkFreeMemory);
171 X(vkGetBufferMemoryRequirements); 171 X(vkGetBufferMemoryRequirements2);
172 X(vkGetDeviceQueue); 172 X(vkGetDeviceQueue);
173 X(vkGetEventStatus); 173 X(vkGetEventStatus);
174 X(vkGetFenceStatus); 174 X(vkGetFenceStatus);
175 X(vkGetImageMemoryRequirements); 175 X(vkGetImageMemoryRequirements);
176 X(vkGetMemoryFdKHR);
177#ifdef _WIN32
178 X(vkGetMemoryWin32HandleKHR);
179#endif
176 X(vkGetQueryPoolResults); 180 X(vkGetQueryPoolResults);
177 X(vkGetSemaphoreCounterValueKHR); 181 X(vkGetSemaphoreCounterValueKHR);
178 X(vkMapMemory); 182 X(vkMapMemory);
@@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const {
505 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); 509 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
506} 510}
507 511
512int DeviceMemory::GetMemoryFdKHR() const {
513 const VkMemoryGetFdInfoKHR get_fd_info{
514 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
515 .pNext = nullptr,
516 .memory = handle,
517 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
518 };
519 int fd;
520 Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd));
521 return fd;
522}
523
524#ifdef _WIN32
525HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const {
526 const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{
527 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
528 .pNext = nullptr,
529 .memory = handle,
530 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
531 };
532 HANDLE win32_handle;
533 Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle));
534 return win32_handle;
535}
536#endif
537
508void DeviceMemory::SetObjectNameEXT(const char* name) const { 538void DeviceMemory::SetObjectNameEXT(const char* name) const {
509 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); 539 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
510} 540}
@@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
756 return DeviceMemory(memory, handle, *dld); 786 return DeviceMemory(memory, handle, *dld);
757} 787}
758 788
759VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { 789VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
760 VkMemoryRequirements requirements; 790 void* pnext) const noexcept {
761 dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); 791 const VkBufferMemoryRequirementsInfo2 info{
762 return requirements; 792 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
793 .pNext = nullptr,
794 .buffer = buffer,
795 };
796 VkMemoryRequirements2 requirements{
797 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
798 .pNext = pnext,
799 .memoryRequirements{},
800 };
801 dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
802 return requirements.memoryRequirements;
763} 803}
764 804
765VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { 805VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 9689de0cb..3e36d356a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -15,8 +15,19 @@
15#include <vector> 15#include <vector>
16 16
17#define VK_NO_PROTOTYPES 17#define VK_NO_PROTOTYPES
18#ifdef _WIN32
19#define VK_USE_PLATFORM_WIN32_KHR
20#endif
18#include <vulkan/vulkan.h> 21#include <vulkan/vulkan.h>
19 22
23// Sanitize macros
24#ifdef CreateEvent
25#undef CreateEvent
26#endif
27#ifdef CreateSemaphore
28#undef CreateSemaphore
29#endif
30
20#include "common/common_types.h" 31#include "common/common_types.h"
21 32
22#ifdef _MSC_VER 33#ifdef _MSC_VER
@@ -174,7 +185,7 @@ struct InstanceDispatch {
174}; 185};
175 186
176/// Table holding Vulkan device function pointers. 187/// Table holding Vulkan device function pointers.
177struct DeviceDispatch : public InstanceDispatch { 188struct DeviceDispatch : InstanceDispatch {
178 PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{}; 189 PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{};
179 PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{}; 190 PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{};
180 PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{}; 191 PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{};
@@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch {
272 PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; 283 PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
273 PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; 284 PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
274 PFN_vkFreeMemory vkFreeMemory{}; 285 PFN_vkFreeMemory vkFreeMemory{};
275 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; 286 PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
276 PFN_vkGetDeviceQueue vkGetDeviceQueue{}; 287 PFN_vkGetDeviceQueue vkGetDeviceQueue{};
277 PFN_vkGetEventStatus vkGetEventStatus{}; 288 PFN_vkGetEventStatus vkGetEventStatus{};
278 PFN_vkGetFenceStatus vkGetFenceStatus{}; 289 PFN_vkGetFenceStatus vkGetFenceStatus{};
279 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; 290 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
291 PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
292#ifdef _WIN32
293 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
294#endif
280 PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; 295 PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
281 PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; 296 PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{};
282 PFN_vkMapMemory vkMapMemory{}; 297 PFN_vkMapMemory vkMapMemory{};
@@ -344,6 +359,9 @@ public:
344 /// Construct an empty handle. 359 /// Construct an empty handle.
345 Handle() = default; 360 Handle() = default;
346 361
362 /// Construct an empty handle.
363 Handle(std::nullptr_t) {}
364
347 /// Copying Vulkan objects is not supported and will never be. 365 /// Copying Vulkan objects is not supported and will never be.
348 Handle(const Handle&) = delete; 366 Handle(const Handle&) = delete;
349 Handle& operator=(const Handle&) = delete; 367 Handle& operator=(const Handle&) = delete;
@@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
659 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; 677 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
660 678
661public: 679public:
680 int GetMemoryFdKHR() const;
681
682#ifdef _WIN32
683 HANDLE GetMemoryWin32HandleKHR() const;
684#endif
685
662 /// Set object name. 686 /// Set object name.
663 void SetObjectNameEXT(const char* name) const; 687 void SetObjectNameEXT(const char* name) const;
664 688
@@ -847,7 +871,8 @@ public:
847 871
848 DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; 872 DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
849 873
850 VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; 874 VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
875 void* pnext = nullptr) const noexcept;
851 876
852 VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; 877 VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
853 878
@@ -1033,6 +1058,12 @@ public:
1033 1058
1034 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, 1059 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1035 VkDependencyFlags dependency_flags, 1060 VkDependencyFlags dependency_flags,
1061 const VkMemoryBarrier& memory_barrier) const noexcept {
1062 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {});
1063 }
1064
1065 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1066 VkDependencyFlags dependency_flags,
1036 const VkBufferMemoryBarrier& buffer_barrier) const noexcept { 1067 const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
1037 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); 1068 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
1038 } 1069 }
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index fb9967c8f..b025ced1c 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -151,6 +151,7 @@ add_executable(yuzu
151 util/util.h 151 util/util.h
152 compatdb.cpp 152 compatdb.cpp
153 compatdb.h 153 compatdb.h
154 yuzu.qrc
154 yuzu.rc 155 yuzu.rc
155) 156)
156 157
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index ffdf34a4a..1c61d419d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -64,7 +64,7 @@ void EmuThread::run() {
64 64
65 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); 65 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
66 66
67 system.Renderer().Rasterizer().LoadDiskResources( 67 system.Renderer().ReadRasterizer()->LoadDiskResources(
68 system.CurrentProcess()->GetTitleID(), stop_run, 68 system.CurrentProcess()->GetTitleID(), stop_run,
69 [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { 69 [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
70 emit LoadProgress(stage, value, total); 70 emit LoadProgress(stage, value, total);
@@ -405,12 +405,17 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
405 if (event->source() == Qt::MouseEventSynthesizedBySystem) { 405 if (event->source() == Qt::MouseEventSynthesizedBySystem) {
406 return; 406 return;
407 } 407 }
408
409 auto pos = event->pos(); 408 auto pos = event->pos();
410 const auto [x, y] = ScaleTouch(pos); 409 const auto [x, y] = ScaleTouch(pos);
411 input_subsystem->GetMouse()->MouseMove(x, y); 410 const int center_x = width() / 2;
411 const int center_y = height() / 2;
412 input_subsystem->GetMouse()->MouseMove(x, y, center_x, center_y);
412 this->TouchMoved(x, y, 0); 413 this->TouchMoved(x, y, 0);
413 414
415 if (Settings::values.mouse_panning) {
416 QCursor::setPos(mapToGlobal({center_x, center_y}));
417 }
418
414 emit MouseActivity(); 419 emit MouseActivity();
415} 420}
416 421
@@ -714,6 +719,11 @@ void GRenderWindow::showEvent(QShowEvent* event) {
714 719
715bool GRenderWindow::eventFilter(QObject* object, QEvent* event) { 720bool GRenderWindow::eventFilter(QObject* object, QEvent* event) {
716 if (event->type() == QEvent::HoverMove) { 721 if (event->type() == QEvent::HoverMove) {
722 if (Settings::values.mouse_panning) {
723 auto* hover_event = static_cast<QMouseEvent*>(event);
724 mouseMoveEvent(hover_event);
725 return false;
726 }
717 emit MouseActivity(); 727 emit MouseActivity();
718 } 728 }
719 return false; 729 return false;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8d85a1986..0635d13d0 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -220,7 +220,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
220// This must be in alphabetical order according to action name as it must have the same order as 220// This must be in alphabetical order according to action name as it must have the same order as
221// UISetting::values.shortcuts, which is alphabetically ordered. 221// UISetting::values.shortcuts, which is alphabetically ordered.
222// clang-format off 222// clang-format off
223const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{ 223const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{
224 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}}, 224 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}},
225 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, 225 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
226 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, 226 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -235,6 +235,7 @@ const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
235 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, 235 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
236 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, 236 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
237 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, 237 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
238 {QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("F9"), Qt::ApplicationShortcut}},
238 {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}}, 239 {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}},
239 {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}}, 240 {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}},
240}}; 241}};
@@ -507,6 +508,9 @@ void Config::ReadControlValues() {
507 508
508 Settings::values.emulate_analog_keyboard = 509 Settings::values.emulate_analog_keyboard =
509 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); 510 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
511 Settings::values.mouse_panning = ReadSetting(QStringLiteral("mouse_panning"), false).toBool();
512 Settings::values.mouse_panning_sensitivity =
513 ReadSetting(QStringLiteral("mouse_panning_sensitivity"), 1).toFloat();
510 514
511 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true); 515 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
512 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), 516 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
@@ -778,14 +782,14 @@ void Config::ReadRendererValues() {
778 ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); 782 ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100);
779 ReadSettingGlobal(Settings::values.use_disk_shader_cache, 783 ReadSettingGlobal(Settings::values.use_disk_shader_cache,
780 QStringLiteral("use_disk_shader_cache"), true); 784 QStringLiteral("use_disk_shader_cache"), true);
781 ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); 785 ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
782 ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, 786 ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
783 QStringLiteral("use_asynchronous_gpu_emulation"), true); 787 QStringLiteral("use_asynchronous_gpu_emulation"), true);
784 ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), 788 ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
785 true); 789 true);
786 ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); 790 ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true);
787 ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), 791 ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"),
788 true); 792 false);
789 ReadSettingGlobal(Settings::values.use_asynchronous_shaders, 793 ReadSettingGlobal(Settings::values.use_asynchronous_shaders,
790 QStringLiteral("use_asynchronous_shaders"), false); 794 QStringLiteral("use_asynchronous_shaders"), false);
791 ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), 795 ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
@@ -1184,7 +1188,9 @@ void Config::SaveControlValues() {
1184 WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false); 1188 WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false);
1185 WriteSetting(QStringLiteral("emulate_analog_keyboard"), 1189 WriteSetting(QStringLiteral("emulate_analog_keyboard"),
1186 Settings::values.emulate_analog_keyboard, false); 1190 Settings::values.emulate_analog_keyboard, false);
1187 1191 WriteSetting(QStringLiteral("mouse_panning"), Settings::values.mouse_panning, false);
1192 WriteSetting(QStringLiteral("mouse_panning_sensitivity"),
1193 Settings::values.mouse_panning_sensitivity, 1.0f);
1188 qt_config->endGroup(); 1194 qt_config->endGroup();
1189} 1195}
1190 1196
@@ -1345,14 +1351,14 @@ void Config::SaveRendererValues() {
1345 Settings::values.use_disk_shader_cache, true); 1351 Settings::values.use_disk_shader_cache, true);
1346 WriteSettingGlobal(QStringLiteral("gpu_accuracy"), 1352 WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
1347 static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), 1353 static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
1348 Settings::values.gpu_accuracy.UsingGlobal(), 0); 1354 Settings::values.gpu_accuracy.UsingGlobal(), 1);
1349 WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), 1355 WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
1350 Settings::values.use_asynchronous_gpu_emulation, true); 1356 Settings::values.use_asynchronous_gpu_emulation, true);
1351 WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, 1357 WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
1352 true); 1358 true);
1353 WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); 1359 WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
1354 WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), 1360 WriteSettingGlobal(QStringLiteral("use_assembly_shaders"),
1355 Settings::values.use_assembly_shaders, true); 1361 Settings::values.use_assembly_shaders, false);
1356 WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), 1362 WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"),
1357 Settings::values.use_asynchronous_shaders, false); 1363 Settings::values.use_asynchronous_shaders, false);
1358 WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, 1364 WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 8a600e19d..949c4eb13 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,7 +42,7 @@ public:
42 default_mouse_buttons; 42 default_mouse_buttons;
43 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; 43 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
44 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; 44 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
45 static const std::array<UISettings::Shortcut, 16> default_hotkeys; 45 static const std::array<UISettings::Shortcut, 17> default_hotkeys;
46 46
47private: 47private:
48 void Initialize(const std::string& config_name); 48 void Initialize(const std::string& config_name);
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index b78a5dff0..9ff32aec4 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5// Include this early to include Vulkan headers how we want to
6#include "video_core/vulkan_common/vulkan_wrapper.h"
7
5#include <QColorDialog> 8#include <QColorDialog>
6#include <QComboBox> 9#include <QComboBox>
7#include <QVulkanInstance> 10#include <QVulkanInstance>
@@ -11,7 +14,8 @@
11#include "core/core.h" 14#include "core/core.h"
12#include "core/settings.h" 15#include "core/settings.h"
13#include "ui_configure_graphics.h" 16#include "ui_configure_graphics.h"
14#include "video_core/renderer_vulkan/renderer_vulkan.h" 17#include "video_core/vulkan_common/vulkan_instance.h"
18#include "video_core/vulkan_common/vulkan_library.h"
15#include "yuzu/configuration/configuration_shared.h" 19#include "yuzu/configuration/configuration_shared.h"
16#include "yuzu/configuration/configure_graphics.h" 20#include "yuzu/configuration/configure_graphics.h"
17 21
@@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
212 ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); 216 ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn());
213} 217}
214 218
215void ConfigureGraphics::RetrieveVulkanDevices() { 219void ConfigureGraphics::RetrieveVulkanDevices() try {
220 using namespace Vulkan;
221
222 vk::InstanceDispatch dld;
223 const Common::DynamicLibrary library = OpenLibrary();
224 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
225 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
226
216 vulkan_devices.clear(); 227 vulkan_devices.clear();
217 for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { 228 vulkan_devices.reserve(physical_devices.size());
229 for (const VkPhysicalDevice device : physical_devices) {
230 const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName;
218 vulkan_devices.push_back(QString::fromStdString(name)); 231 vulkan_devices.push_back(QString::fromStdString(name));
219 } 232 }
233
234} catch (const Vulkan::vk::Exception& exception) {
235 LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
220} 236}
221 237
222Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { 238Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp
index 4e557bc6f..a1a0eb676 100644
--- a/src/yuzu/configuration/configure_input_advanced.cpp
+++ b/src/yuzu/configuration/configure_input_advanced.cpp
@@ -122,6 +122,9 @@ void ConfigureInputAdvanced::ApplyConfiguration() {
122 Settings::values.mouse_enabled = ui->mouse_enabled->isChecked(); 122 Settings::values.mouse_enabled = ui->mouse_enabled->isChecked();
123 Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked(); 123 Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked();
124 Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked(); 124 Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked();
125 Settings::values.mouse_panning = ui->mouse_panning->isChecked();
126 Settings::values.mouse_panning_sensitivity =
127 static_cast<float>(ui->mouse_panning_sensitivity->value());
125 Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked(); 128 Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked();
126} 129}
127 130
@@ -149,6 +152,8 @@ void ConfigureInputAdvanced::LoadConfiguration() {
149 ui->mouse_enabled->setChecked(Settings::values.mouse_enabled); 152 ui->mouse_enabled->setChecked(Settings::values.mouse_enabled);
150 ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled); 153 ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled);
151 ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard); 154 ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard);
155 ui->mouse_panning->setChecked(Settings::values.mouse_panning);
156 ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity);
152 ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled); 157 ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled);
153 158
154 UpdateUIEnabled(); 159 UpdateUIEnabled();
diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui
index f207e5d3b..173130d8d 100644
--- a/src/yuzu/configuration/configure_input_advanced.ui
+++ b/src/yuzu/configuration/configure_input_advanced.ui
@@ -2546,27 +2546,65 @@
2546 </property> 2546 </property>
2547 </widget> 2547 </widget>
2548 </item> 2548 </item>
2549 <item row="1" column="0"> 2549 <item row="1" column="0">
2550 <widget class="QCheckBox" name="emulate_analog_keyboard"> 2550 <widget class="QCheckBox" name="emulate_analog_keyboard">
2551 <property name="minimumSize"> 2551 <property name="minimumSize">
2552 <size> 2552 <size>
2553 <width>0</width> 2553 <width>0</width>
2554 <height>23</height> 2554 <height>23</height>
2555 </size> 2555 </size>
2556 </property> 2556 </property>
2557 <property name="text"> 2557 <property name="text">
2558 <string>Emulate Analog with Keyboard Input</string> 2558 <string>Emulate Analog with Keyboard Input</string>
2559 </property> 2559 </property>
2560 </widget> 2560 </widget>
2561 </item> 2561 </item>
2562 <item row="5" column="2"> 2562 <item row="2" column="0">
2563 <widget class="QCheckBox" name="mouse_panning">
2564 <property name="minimumSize">
2565 <size>
2566 <width>0</width>
2567 <height>23</height>
2568 </size>
2569 </property>
2570 <property name="text">
2571 <string>Enable mouse panning</string>
2572 </property>
2573 </widget>
2574 </item>
2575 <item row="2" column="2">
2576 <widget class="QDoubleSpinBox" name="mouse_panning_sensitivity">
2577 <property name="toolTip">
2578 <string>Mouse sensitivity</string>
2579 </property>
2580 <property name="alignment">
2581 <set>Qt::AlignCenter</set>
2582 </property>
2583 <property name="decimals">
2584 <number>2</number>
2585 </property>
2586 <property name="minimum">
2587 <double>0.100000000000000</double>
2588 </property>
2589 <property name="maximum">
2590 <double>16.000000000000000</double>
2591 </property>
2592 <property name="singleStep">
2593 <double>0.010000000000000</double>
2594 </property>
2595 <property name="value">
2596 <double>1.000000000000000</double>
2597 </property>
2598 </widget>
2599 </item>
2600 <item row="6" column="2">
2563 <widget class="QPushButton" name="touchscreen_advanced"> 2601 <widget class="QPushButton" name="touchscreen_advanced">
2564 <property name="text"> 2602 <property name="text">
2565 <string>Advanced</string> 2603 <string>Advanced</string>
2566 </property> 2604 </property>
2567 </widget> 2605 </widget>
2568 </item> 2606 </item>
2569 <item row="2" column="1"> 2607 <item row="3" column="1">
2570 <spacer name="horizontalSpacer_8"> 2608 <spacer name="horizontalSpacer_8">
2571 <property name="orientation"> 2609 <property name="orientation">
2572 <enum>Qt::Horizontal</enum> 2610 <enum>Qt::Horizontal</enum>
@@ -2582,21 +2620,21 @@
2582 </property> 2620 </property>
2583 </spacer> 2621 </spacer>
2584 </item> 2622 </item>
2585 <item row="2" column="2"> 2623 <item row="3" column="2">
2586 <widget class="QPushButton" name="mouse_advanced"> 2624 <widget class="QPushButton" name="mouse_advanced">
2587 <property name="text"> 2625 <property name="text">
2588 <string>Advanced</string> 2626 <string>Advanced</string>
2589 </property> 2627 </property>
2590 </widget> 2628 </widget>
2591 </item> 2629 </item>
2592 <item row="5" column="0"> 2630 <item row="6" column="0">
2593 <widget class="QCheckBox" name="touchscreen_enabled"> 2631 <widget class="QCheckBox" name="touchscreen_enabled">
2594 <property name="text"> 2632 <property name="text">
2595 <string>Touchscreen</string> 2633 <string>Touchscreen</string>
2596 </property> 2634 </property>
2597 </widget> 2635 </widget>
2598 </item> 2636 </item>
2599 <item row="2" column="0"> 2637 <item row="3" column="0">
2600 <widget class="QCheckBox" name="mouse_enabled"> 2638 <widget class="QCheckBox" name="mouse_enabled">
2601 <property name="minimumSize"> 2639 <property name="minimumSize">
2602 <size> 2640 <size>
@@ -2609,28 +2647,28 @@
2609 </property> 2647 </property>
2610 </widget> 2648 </widget>
2611 </item> 2649 </item>
2612 <item row="7" column="0"> 2650 <item row="8" column="0">
2613 <widget class="QLabel" name="motion_touch"> 2651 <widget class="QLabel" name="motion_touch">
2614 <property name="text"> 2652 <property name="text">
2615 <string>Motion / Touch</string> 2653 <string>Motion / Touch</string>
2616 </property> 2654 </property>
2617 </widget> 2655 </widget>
2618 </item> 2656 </item>
2619 <item row="7" column="2"> 2657 <item row="8" column="2">
2620 <widget class="QPushButton" name="buttonMotionTouch"> 2658 <widget class="QPushButton" name="buttonMotionTouch">
2621 <property name="text"> 2659 <property name="text">
2622 <string>Configure</string> 2660 <string>Configure</string>
2623 </property> 2661 </property>
2624 </widget> 2662 </widget>
2625 </item> 2663 </item>
2626 <item row="6" column="0"> 2664 <item row="7" column="0">
2627 <widget class="QCheckBox" name="debug_enabled"> 2665 <widget class="QCheckBox" name="debug_enabled">
2628 <property name="text"> 2666 <property name="text">
2629 <string>Debug Controller</string> 2667 <string>Debug Controller</string>
2630 </property> 2668 </property>
2631 </widget> 2669 </widget>
2632 </item> 2670 </item>
2633 <item row="6" column="2"> 2671 <item row="7" column="2">
2634 <widget class="QPushButton" name="debug_configure"> 2672 <widget class="QPushButton" name="debug_configure">
2635 <property name="text"> 2673 <property name="text">
2636 <string>Configure</string> 2674 <string>Configure</string>
diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp
index b8bcb44a4..61ba91cef 100644
--- a/src/yuzu/configuration/configure_input_player_widget.cpp
+++ b/src/yuzu/configuration/configure_input_player_widget.cpp
@@ -37,7 +37,8 @@ void PlayerControlPreview::SetPlayerInput(std::size_t index, const ButtonParam&
37 Input::CreateDevice<Input::AnalogDevice>); 37 Input::CreateDevice<Input::AnalogDevice>);
38 UpdateColors(); 38 UpdateColors();
39} 39}
40void PlayerControlPreview::SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_, 40void PlayerControlPreview::SetPlayerInputRaw(std::size_t index,
41 const Settings::ButtonsRaw& buttons_,
41 Settings::AnalogsRaw analogs_) { 42 Settings::AnalogsRaw analogs_) {
42 player_index = index; 43 player_index = index;
43 std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN, 44 std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN,
@@ -520,14 +521,15 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center)
520 { 521 {
521 // Draw joysticks 522 // Draw joysticks
522 using namespace Settings::NativeAnalog; 523 using namespace Settings::NativeAnalog;
523 DrawJoystick(p, center + QPointF(-65, -65) + (axis_values[LStick].value * 7), 1.62f, 524 const auto& l_stick = axis_values[LStick];
524 button_values[Settings::NativeButton::LStick]); 525 const auto l_button = button_values[Settings::NativeButton::LStick];
525 DrawJoystick(p, center + QPointF(65, 12) + (axis_values[RStick].value * 7), 1.62f, 526 const auto& r_stick = axis_values[RStick];
526 button_values[Settings::NativeButton::RStick]); 527 const auto r_button = button_values[Settings::NativeButton::RStick];
527 DrawRawJoystick(p, center + QPointF(-180, 90), axis_values[LStick].raw_value, 528
528 axis_values[LStick].properties); 529 DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button);
529 DrawRawJoystick(p, center + QPointF(180, 90), axis_values[RStick].raw_value, 530 DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button);
530 axis_values[RStick].properties); 531 DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties);
532 DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties);
531 } 533 }
532 534
533 using namespace Settings::NativeButton; 535 using namespace Settings::NativeButton;
@@ -606,14 +608,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen
606 { 608 {
607 // Draw joysticks 609 // Draw joysticks
608 using namespace Settings::NativeAnalog; 610 using namespace Settings::NativeAnalog;
609 DrawJoystick(p, center + QPointF(-171, -41) + (axis_values[LStick].value * 4), 1.0f, 611 const auto& l_stick = axis_values[LStick];
610 button_values[Settings::NativeButton::LStick]); 612 const auto l_button = button_values[Settings::NativeButton::LStick];
611 DrawJoystick(p, center + QPointF(171, 8) + (axis_values[RStick].value * 4), 1.0f, 613 const auto& r_stick = axis_values[RStick];
612 button_values[Settings::NativeButton::RStick]); 614 const auto r_button = button_values[Settings::NativeButton::RStick];
613 DrawRawJoystick(p, center + QPointF(-50, 0), axis_values[LStick].raw_value, 615
614 axis_values[LStick].properties); 616 DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button);
615 DrawRawJoystick(p, center + QPointF(50, 0), axis_values[RStick].raw_value, 617 DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button);
616 axis_values[RStick].properties); 618 DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties);
619 DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties);
617 } 620 }
618 621
619 using namespace Settings::NativeButton; 622 using namespace Settings::NativeButton;
@@ -702,9 +705,9 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center)
702 { 705 {
703 // Draw joysticks 706 // Draw joysticks
704 using namespace Settings::NativeAnalog; 707 using namespace Settings::NativeAnalog;
705 DrawProJoystick(p, center + QPointF(-111, -55) + (axis_values[LStick].value * 11), 708 DrawProJoystick(p, center + QPointF(-111, -55), axis_values[LStick].value, 11,
706 button_values[Settings::NativeButton::LStick]); 709 button_values[Settings::NativeButton::LStick]);
707 DrawProJoystick(p, center + QPointF(51, 0) + (axis_values[RStick].value * 11), 710 DrawProJoystick(p, center + QPointF(51, 0), axis_values[RStick].value, 11,
708 button_values[Settings::NativeButton::RStick]); 711 button_values[Settings::NativeButton::RStick]);
709 DrawRawJoystick(p, center + QPointF(-50, 105), axis_values[LStick].raw_value, 712 DrawRawJoystick(p, center + QPointF(-50, 105), axis_values[LStick].raw_value,
710 axis_values[LStick].properties); 713 axis_values[LStick].properties);
@@ -1005,12 +1008,6 @@ constexpr std::array<float, 3 * 2> up_arrow_symbol = {
1005 0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f, 1008 0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f,
1006}; 1009};
1007 1010
1008constexpr std::array<float, 13 * 2> up_arrow = {
1009 9.4f, -9.8f, 9.4f, -10.2f, 8.9f, -29.8f, 8.5f, -30.0f, 8.1f,
1010 -30.1f, 7.7f, -30.1f, -8.6f, -30.0f, -9.0f, -29.8f, -9.3f, -29.5f,
1011 -9.5f, -29.1f, -9.5f, -28.7f, -9.1f, -9.1f, -8.8f, -8.8f,
1012};
1013
1014constexpr std::array<float, 64 * 2> trigger_button = { 1011constexpr std::array<float, 64 * 2> trigger_button = {
1015 5.5f, -12.6f, 5.8f, -12.6f, 6.7f, -12.5f, 8.1f, -12.3f, 8.6f, -12.2f, 9.2f, -12.0f, 1012 5.5f, -12.6f, 5.8f, -12.6f, 6.7f, -12.5f, 8.1f, -12.3f, 8.6f, -12.2f, 9.2f, -12.0f,
1016 9.5f, -11.9f, 9.9f, -11.8f, 10.6f, -11.5f, 11.0f, -11.3f, 11.2f, -11.2f, 11.4f, -11.1f, 1013 9.5f, -11.9f, 9.9f, -11.8f, 10.6f, -11.5f, 11.0f, -11.3f, 11.2f, -11.2f, 11.4f, -11.1f,
@@ -1460,15 +1457,18 @@ void PlayerControlPreview::DrawProBody(QPainter& p, const QPointF center) {
1460 constexpr int radius1 = 32; 1457 constexpr int radius1 = 32;
1461 1458
1462 for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) { 1459 for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) {
1463 qleft_handle[point] = 1460 const float left_x = pro_left_handle[point * 2 + 0];
1464 center + QPointF(pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]); 1461 const float left_y = pro_left_handle[point * 2 + 1];
1465 qright_handle[point] = 1462
1466 center + QPointF(-pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]); 1463 qleft_handle[point] = center + QPointF(left_x, left_y);
1464 qright_handle[point] = center + QPointF(-left_x, left_y);
1467 } 1465 }
1468 for (std::size_t point = 0; point < pro_body.size() / 2; ++point) { 1466 for (std::size_t point = 0; point < pro_body.size() / 2; ++point) {
1469 qbody[point] = center + QPointF(pro_body[point * 2], pro_body[point * 2 + 1]); 1467 const float body_x = pro_body[point * 2 + 0];
1470 qbody[pro_body.size() - 1 - point] = 1468 const float body_y = pro_body[point * 2 + 1];
1471 center + QPointF(-pro_body[point * 2], pro_body[point * 2 + 1]); 1469
1470 qbody[point] = center + QPointF(body_x, body_y);
1471 qbody[pro_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
1472 } 1472 }
1473 1473
1474 // Draw left handle body 1474 // Draw left handle body
@@ -1499,21 +1499,25 @@ void PlayerControlPreview::DrawGCBody(QPainter& p, const QPointF center) {
1499 constexpr float angle = 2 * 3.1415f / 8; 1499 constexpr float angle = 2 * 3.1415f / 8;
1500 1500
1501 for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) { 1501 for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) {
1502 qleft_handle[point] = 1502 const float body_x = gc_left_body[point * 2 + 0];
1503 center + QPointF(gc_left_body[point * 2], gc_left_body[point * 2 + 1]); 1503 const float body_y = gc_left_body[point * 2 + 1];
1504 qright_handle[point] = 1504
1505 center + QPointF(-gc_left_body[point * 2], gc_left_body[point * 2 + 1]); 1505 qleft_handle[point] = center + QPointF(body_x, body_y);
1506 qright_handle[point] = center + QPointF(-body_x, body_y);
1506 } 1507 }
1507 for (std::size_t point = 0; point < gc_body.size() / 2; ++point) { 1508 for (std::size_t point = 0; point < gc_body.size() / 2; ++point) {
1508 qbody[point] = center + QPointF(gc_body[point * 2], gc_body[point * 2 + 1]); 1509 const float body_x = gc_body[point * 2 + 0];
1509 qbody[gc_body.size() - 1 - point] = 1510 const float body_y = gc_body[point * 2 + 1];
1510 center + QPointF(-gc_body[point * 2], gc_body[point * 2 + 1]); 1511
1512 qbody[point] = center + QPointF(body_x, body_y);
1513 qbody[gc_body.size() - 1 - point] = center + QPointF(-body_x, body_y);
1511 } 1514 }
1512 for (std::size_t point = 0; point < 8; ++point) { 1515 for (std::size_t point = 0; point < 8; ++point) {
1513 left_hex[point] = 1516 const float point_cos = std::cos(point * angle);
1514 center + QPointF(34 * std::cos(point * angle) - 111, 34 * std::sin(point * angle) - 44); 1517 const float point_sin = std::sin(point * angle);
1515 right_hex[point] = 1518
1516 center + QPointF(26 * std::cos(point * angle) + 61, 26 * std::sin(point * angle) + 37); 1519 left_hex[point] = center + QPointF(34 * point_cos - 111, 34 * point_sin - 44);
1520 right_hex[point] = center + QPointF(26 * point_cos + 61, 26 * point_sin + 37);
1517 } 1521 }
1518 1522
1519 // Draw body 1523 // Draw body
@@ -1634,32 +1638,36 @@ void PlayerControlPreview::DrawDualBody(QPainter& p, const QPointF center) {
1634 constexpr float offset = 209.3f; 1638 constexpr float offset = 209.3f;
1635 1639
1636 for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) { 1640 for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) {
1637 left_joycon[point] = center + QPointF(left_joycon_body[point * 2] * size + offset, 1641 const float body_x = left_joycon_body[point * 2 + 0];
1638 left_joycon_body[point * 2 + 1] * size - 1); 1642 const float body_y = left_joycon_body[point * 2 + 1];
1639 right_joycon[point] = center + QPointF(-left_joycon_body[point * 2] * size - offset, 1643
1640 left_joycon_body[point * 2 + 1] * size - 1); 1644 left_joycon[point] = center + QPointF(body_x * size + offset, body_y * size - 1);
1645 right_joycon[point] = center + QPointF(-body_x * size - offset, body_y * size - 1);
1641 } 1646 }
1642 for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) { 1647 for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) {
1643 qleft_joycon_slider[point] = 1648 const float slider_x = left_joycon_slider[point * 2 + 0];
1644 center + QPointF(left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]); 1649 const float slider_y = left_joycon_slider[point * 2 + 1];
1645 qright_joycon_slider[point] = 1650
1646 center + QPointF(-left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]); 1651 qleft_joycon_slider[point] = center + QPointF(slider_x, slider_y);
1652 qright_joycon_slider[point] = center + QPointF(-slider_x, slider_y);
1647 } 1653 }
1648 for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) { 1654 for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) {
1655 const float top_view_x = left_joycon_topview[point * 2 + 0];
1656 const float top_view_y = left_joycon_topview[point * 2 + 1];
1657
1649 qleft_joycon_topview[point] = 1658 qleft_joycon_topview[point] =
1650 center + QPointF(left_joycon_topview[point * 2] * size2 - 52, 1659 center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
1651 left_joycon_topview[point * 2 + 1] * size2 - 52);
1652 qright_joycon_topview[point] = 1660 qright_joycon_topview[point] =
1653 center + QPointF(-left_joycon_topview[point * 2] * size2 + 52, 1661 center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
1654 left_joycon_topview[point * 2 + 1] * size2 - 52);
1655 } 1662 }
1656 for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) { 1663 for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) {
1664 const float top_view_x = left_joycon_slider_topview[point * 2 + 0];
1665 const float top_view_y = left_joycon_slider_topview[point * 2 + 1];
1666
1657 qleft_joycon_slider_topview[point] = 1667 qleft_joycon_slider_topview[point] =
1658 center + QPointF(left_joycon_slider_topview[point * 2] * size2 - 52, 1668 center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52);
1659 left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
1660 qright_joycon_slider_topview[point] = 1669 qright_joycon_slider_topview[point] =
1661 center + QPointF(-left_joycon_slider_topview[point * 2] * size2 + 52, 1670 center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52);
1662 left_joycon_slider_topview[point * 2 + 1] * size2 - 52);
1663 } 1671 }
1664 1672
1665 // right joycon body 1673 // right joycon body
@@ -1908,18 +1916,19 @@ void PlayerControlPreview::DrawProTriggers(QPainter& p, const QPointF center, bo
1908 std::array<QPointF, pro_body_top.size()> qbody_top; 1916 std::array<QPointF, pro_body_top.size()> qbody_top;
1909 1917
1910 for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) { 1918 for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) {
1911 qleft_trigger[point] = 1919 const float trigger_x = pro_left_trigger[point * 2 + 0];
1912 center + QPointF(pro_left_trigger[point * 2], 1920 const float trigger_y = pro_left_trigger[point * 2 + 1];
1913 pro_left_trigger[point * 2 + 1] + (left_pressed ? 2 : 0)); 1921
1914 qright_trigger[point] = 1922 qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 2 : 0));
1915 center + QPointF(-pro_left_trigger[point * 2], 1923 qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 2 : 0));
1916 pro_left_trigger[point * 2 + 1] + (right_pressed ? 2 : 0));
1917 } 1924 }
1918 1925
1919 for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) { 1926 for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) {
1920 qbody_top[pro_body_top.size() - 1 - point] = 1927 const float top_x = pro_body_top[point * 2 + 0];
1921 center + QPointF(-pro_body_top[point * 2], pro_body_top[point * 2 + 1]); 1928 const float top_y = pro_body_top[point * 2 + 1];
1922 qbody_top[point] = center + QPointF(pro_body_top[point * 2], pro_body_top[point * 2 + 1]); 1929
1930 qbody_top[pro_body_top.size() - 1 - point] = center + QPointF(-top_x, top_y);
1931 qbody_top[point] = center + QPointF(top_x, top_y);
1923 } 1932 }
1924 1933
1925 // Pro body detail 1934 // Pro body detail
@@ -1942,12 +1951,11 @@ void PlayerControlPreview::DrawGCTriggers(QPainter& p, const QPointF center, boo
1942 std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger; 1951 std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger;
1943 1952
1944 for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) { 1953 for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) {
1945 qleft_trigger[point] = 1954 const float trigger_x = left_gc_trigger[point * 2 + 0];
1946 center + QPointF(left_gc_trigger[point * 2], 1955 const float trigger_y = left_gc_trigger[point * 2 + 1];
1947 left_gc_trigger[point * 2 + 1] + (left_pressed ? 10 : 0)); 1956
1948 qright_trigger[point] = 1957 qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 10 : 0));
1949 center + QPointF(-left_gc_trigger[point * 2], 1958 qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 10 : 0));
1950 left_gc_trigger[point * 2 + 1] + (right_pressed ? 10 : 0));
1951 } 1959 }
1952 1960
1953 // Left trigger 1961 // Left trigger
@@ -1976,12 +1984,13 @@ void PlayerControlPreview::DrawHandheldTriggers(QPainter& p, const QPointF cente
1976 std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger; 1984 std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger;
1977 1985
1978 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) { 1986 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
1987 const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
1988 const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
1989
1979 qleft_trigger[point] = 1990 qleft_trigger[point] =
1980 center + QPointF(left_joycon_trigger[point * 2], 1991 center + QPointF(left_trigger_x, left_trigger_y + (left_pressed ? 0.5f : 0));
1981 left_joycon_trigger[point * 2 + 1] + (left_pressed ? 0.5f : 0));
1982 qright_trigger[point] = 1992 qright_trigger[point] =
1983 center + QPointF(-left_joycon_trigger[point * 2], 1993 center + QPointF(-left_trigger_x, left_trigger_y + (right_pressed ? 0.5f : 0));
1984 left_joycon_trigger[point * 2 + 1] + (right_pressed ? 0.5f : 0));
1985 } 1994 }
1986 1995
1987 // Left trigger 1996 // Left trigger
@@ -2001,12 +2010,14 @@ void PlayerControlPreview::DrawDualTriggers(QPainter& p, const QPointF center, b
2001 constexpr float size = 1.62f; 2010 constexpr float size = 1.62f;
2002 constexpr float offset = 210.6f; 2011 constexpr float offset = 210.6f;
2003 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) { 2012 for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) {
2004 qleft_trigger[point] = 2013 const float left_trigger_x = left_joycon_trigger[point * 2 + 0];
2005 center + QPointF(left_joycon_trigger[point * 2] * size + offset, 2014 const float left_trigger_y = left_joycon_trigger[point * 2 + 1];
2006 left_joycon_trigger[point * 2 + 1] * size + (left_pressed ? 0.5f : 0)); 2015
2007 qright_trigger[point] = center + QPointF(-left_joycon_trigger[point * 2] * size - offset, 2016 qleft_trigger[point] = center + QPointF(left_trigger_x * size + offset,
2008 left_joycon_trigger[point * 2 + 1] * size + 2017 left_trigger_y * size + (left_pressed ? 0.5f : 0));
2009 (right_pressed ? 0.5f : 0)); 2018 qright_trigger[point] =
2019 center + QPointF(-left_trigger_x * size - offset,
2020 left_trigger_y * size + (right_pressed ? 0.5f : 0));
2010 } 2021 }
2011 2022
2012 // Left trigger 2023 // Left trigger
@@ -2026,13 +2037,16 @@ void PlayerControlPreview::DrawDualTriggersTopView(QPainter& p, const QPointF ce
2026 constexpr float size = 0.9f; 2037 constexpr float size = 0.9f;
2027 2038
2028 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) { 2039 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
2029 qleft_trigger[point] = center + QPointF(left_joystick_L_topview[point * 2] * size - 50, 2040 const float top_view_x = left_joystick_L_topview[point * 2 + 0];
2030 left_joystick_L_topview[point * 2 + 1] * size - 52); 2041 const float top_view_y = left_joystick_L_topview[point * 2 + 1];
2042
2043 qleft_trigger[point] = center + QPointF(top_view_x * size - 50, top_view_y * size - 52);
2031 } 2044 }
2032 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) { 2045 for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) {
2033 qright_trigger[point] = 2046 const float top_view_x = left_joystick_L_topview[point * 2 + 0];
2034 center + QPointF(-left_joystick_L_topview[point * 2] * size + 50, 2047 const float top_view_y = left_joystick_L_topview[point * 2 + 1];
2035 left_joystick_L_topview[point * 2 + 1] * size - 52); 2048
2049 qright_trigger[point] = center + QPointF(-top_view_x * size + 50, top_view_y * size - 52);
2036 } 2050 }
2037 2051
2038 p.setPen(colors.outline); 2052 p.setPen(colors.outline);
@@ -2276,15 +2290,39 @@ void PlayerControlPreview::DrawJoystickSideview(QPainter& p, const QPointF cente
2276 p.drawLine(p2.at(32), p2.at(71)); 2290 p.drawLine(p2.at(32), p2.at(71));
2277} 2291}
2278 2292
2279void PlayerControlPreview::DrawProJoystick(QPainter& p, const QPointF center, bool pressed) { 2293void PlayerControlPreview::DrawProJoystick(QPainter& p, const QPointF center, const QPointF offset,
2294 float offset_scalar, bool pressed) {
2295 const float radius1 = 24.0f;
2296 const float radius2 = 17.0f;
2297
2298 const QPointF offset_center = center + offset * offset_scalar;
2299
2300 const auto amplitude = static_cast<float>(
2301 1.0 - std::sqrt((offset.x() * offset.x()) + (offset.y() * offset.y())) * 0.1f);
2302
2303 const float rotation =
2304 ((offset.x() == 0) ? atan(1) * 2 : atan(offset.y() / offset.x())) * (180 / (atan(1) * 4));
2305
2306 p.save();
2307 p.translate(offset_center);
2308 p.rotate(rotation);
2309
2280 // Outer circle 2310 // Outer circle
2281 p.setPen(colors.outline); 2311 p.setPen(colors.outline);
2282 p.setBrush(pressed ? colors.highlight : colors.button); 2312 p.setBrush(pressed ? colors.highlight : colors.button);
2283 DrawCircle(p, center, 24.0f); 2313 p.drawEllipse(QPointF(0, 0), radius1 * amplitude, radius1);
2284 2314
2285 // Inner circle 2315 // Inner circle
2286 p.setBrush(pressed ? colors.highlight2 : colors.button2); 2316 p.setBrush(pressed ? colors.highlight2 : colors.button2);
2287 DrawCircle(p, center, 17.0f); 2317
2318 const float inner_offset =
2319 (radius1 - radius2) * 0.4f * ((offset.x() == 0 && offset.y() < 0) ? -1.0f : 1.0f);
2320 const float offset_factor = (1.0f - amplitude) / 0.1f;
2321
2322 p.drawEllipse(QPointF((offset.x() < 0) ? -inner_offset : inner_offset, 0) * offset_factor,
2323 radius2 * amplitude, radius2);
2324
2325 p.restore();
2288} 2326}
2289 2327
2290void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, bool pressed) { 2328void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, bool pressed) {
@@ -2302,7 +2340,7 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo
2302} 2340}
2303 2341
2304void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value, 2342void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value,
2305 const Input::AnalogProperties properties) { 2343 const Input::AnalogProperties& properties) {
2306 constexpr float size = 45.0f; 2344 constexpr float size = 45.0f;
2307 const float range = size * properties.range; 2345 const float range = size * properties.range;
2308 const float deadzone = size * properties.deadzone; 2346 const float deadzone = size * properties.deadzone;
@@ -2425,17 +2463,16 @@ void PlayerControlPreview::DrawArrowButtonOutline(QPainter& p, const QPointF cen
2425 std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline; 2463 std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline;
2426 2464
2427 for (std::size_t point = 0; point < arrow_points - 1; ++point) { 2465 for (std::size_t point = 0; point < arrow_points - 1; ++point) {
2428 arrow_button_outline[point] = center + QPointF(up_arrow_button[point * 2] * size, 2466 const float up_arrow_x = up_arrow_button[point * 2 + 0];
2429 up_arrow_button[point * 2 + 1] * size); 2467 const float up_arrow_y = up_arrow_button[point * 2 + 1];
2468
2469 arrow_button_outline[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
2430 arrow_button_outline[(arrow_points - 1) * 2 - point - 1] = 2470 arrow_button_outline[(arrow_points - 1) * 2 - point - 1] =
2431 center + 2471 center + QPointF(up_arrow_y * size, up_arrow_x * size);
2432 QPointF(up_arrow_button[point * 2 + 1] * size, up_arrow_button[point * 2] * size);
2433 arrow_button_outline[(arrow_points - 1) * 2 + point] = 2472 arrow_button_outline[(arrow_points - 1) * 2 + point] =
2434 center + 2473 center + QPointF(-up_arrow_x * size, -up_arrow_y * size);
2435 QPointF(-up_arrow_button[point * 2] * size, -up_arrow_button[point * 2 + 1] * size);
2436 arrow_button_outline[(arrow_points - 1) * 4 - point - 1] = 2474 arrow_button_outline[(arrow_points - 1) * 4 - point - 1] =
2437 center + 2475 center + QPointF(-up_arrow_y * size, -up_arrow_x * size);
2438 QPointF(-up_arrow_button[point * 2 + 1] * size, -up_arrow_button[point * 2] * size);
2439 } 2476 }
2440 // Draw arrow button outline 2477 // Draw arrow button outline
2441 p.setPen(colors.outline); 2478 p.setPen(colors.outline);
@@ -2449,22 +2486,21 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
2449 QPoint offset; 2486 QPoint offset;
2450 2487
2451 for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) { 2488 for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) {
2489 const float up_arrow_x = up_arrow_button[point * 2 + 0];
2490 const float up_arrow_y = up_arrow_button[point * 2 + 1];
2491
2452 switch (direction) { 2492 switch (direction) {
2453 case Direction::Up: 2493 case Direction::Up:
2454 arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size, 2494 arrow_button[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
2455 up_arrow_button[point * 2 + 1] * size);
2456 break; 2495 break;
2457 case Direction::Left: 2496 case Direction::Left:
2458 arrow_button[point] = center + QPointF(up_arrow_button[point * 2 + 1] * size, 2497 arrow_button[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
2459 up_arrow_button[point * 2] * size);
2460 break; 2498 break;
2461 case Direction::Right: 2499 case Direction::Right:
2462 arrow_button[point] = center + QPointF(-up_arrow_button[point * 2 + 1] * size, 2500 arrow_button[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
2463 up_arrow_button[point * 2] * size);
2464 break; 2501 break;
2465 case Direction::Down: 2502 case Direction::Down:
2466 arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size, 2503 arrow_button[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
2467 -up_arrow_button[point * 2 + 1] * size);
2468 break; 2504 break;
2469 case Direction::None: 2505 case Direction::None:
2470 break; 2506 break;
@@ -2503,17 +2539,17 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center,
2503void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center, 2539void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center,
2504 const Direction direction, bool pressed) { 2540 const Direction direction, bool pressed) {
2505 std::array<QPointF, trigger_button.size() / 2> qtrigger_button; 2541 std::array<QPointF, trigger_button.size() / 2> qtrigger_button;
2506 QPoint offset;
2507 2542
2508 for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) { 2543 for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) {
2544 const float trigger_button_x = trigger_button[point * 2 + 0];
2545 const float trigger_button_y = trigger_button[point * 2 + 1];
2546
2509 switch (direction) { 2547 switch (direction) {
2510 case Direction::Left: 2548 case Direction::Left:
2511 qtrigger_button[point] = 2549 qtrigger_button[point] = center + QPointF(-trigger_button_x, trigger_button_y);
2512 center + QPointF(-trigger_button[point * 2], trigger_button[point * 2 + 1]);
2513 break; 2550 break;
2514 case Direction::Right: 2551 case Direction::Right:
2515 qtrigger_button[point] = 2552 qtrigger_button[point] = center + QPointF(trigger_button_x, trigger_button_y);
2516 center + QPointF(trigger_button[point * 2], trigger_button[point * 2 + 1]);
2517 break; 2553 break;
2518 case Direction::Up: 2554 case Direction::Up:
2519 case Direction::Down: 2555 case Direction::Down:
@@ -2636,22 +2672,21 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di
2636 std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol; 2672 std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol;
2637 2673
2638 for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) { 2674 for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) {
2675 const float up_arrow_x = up_arrow_symbol[point * 2 + 0];
2676 const float up_arrow_y = up_arrow_symbol[point * 2 + 1];
2677
2639 switch (direction) { 2678 switch (direction) {
2640 case Direction::Up: 2679 case Direction::Up:
2641 arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size, 2680 arrow_symbol[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size);
2642 up_arrow_symbol[point * 2 + 1] * size);
2643 break; 2681 break;
2644 case Direction::Left: 2682 case Direction::Left:
2645 arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2 + 1] * size, 2683 arrow_symbol[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size);
2646 up_arrow_symbol[point * 2] * size);
2647 break; 2684 break;
2648 case Direction::Right: 2685 case Direction::Right:
2649 arrow_symbol[point] = center + QPointF(-up_arrow_symbol[point * 2 + 1] * size, 2686 arrow_symbol[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size);
2650 up_arrow_symbol[point * 2] * size);
2651 break; 2687 break;
2652 case Direction::Down: 2688 case Direction::Down:
2653 arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size, 2689 arrow_symbol[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size);
2654 -up_arrow_symbol[point * 2 + 1] * size);
2655 break; 2690 break;
2656 case Direction::None: 2691 case Direction::None:
2657 break; 2692 break;
diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h
index 39565f795..91c3343f1 100644
--- a/src/yuzu/configuration/configure_input_player_widget.h
+++ b/src/yuzu/configuration/configure_input_player_widget.h
@@ -25,7 +25,7 @@ public:
25 25
26 void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param, 26 void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param,
27 const AnalogParam& analogs_param); 27 const AnalogParam& analogs_param);
28 void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_, 28 void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw& buttons_,
29 Settings::AnalogsRaw analogs_); 29 Settings::AnalogsRaw analogs_);
30 void SetConnectedStatus(bool checked); 30 void SetConnectedStatus(bool checked);
31 void SetControllerType(Settings::ControllerType type); 31 void SetControllerType(Settings::ControllerType type);
@@ -138,9 +138,9 @@ private:
138 // Draw joystick functions 138 // Draw joystick functions
139 void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed); 139 void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed);
140 void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed); 140 void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed);
141 void DrawRawJoystick(QPainter& p, QPointF center, const QPointF value, 141 void DrawRawJoystick(QPainter& p, QPointF center, QPointF value,
142 const Input::AnalogProperties properties); 142 const Input::AnalogProperties& properties);
143 void DrawProJoystick(QPainter& p, QPointF center, bool pressed); 143 void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed);
144 void DrawGCJoystick(QPainter& p, QPointF center, bool pressed); 144 void DrawGCJoystick(QPainter& p, QPointF center, bool pressed);
145 145
146 // Draw button functions 146 // Draw button functions
diff --git a/src/yuzu/debugger/controller.cpp b/src/yuzu/debugger/controller.cpp
index 85724a8f3..2731d948d 100644
--- a/src/yuzu/debugger/controller.cpp
+++ b/src/yuzu/debugger/controller.cpp
@@ -42,7 +42,7 @@ void ControllerDialog::refreshConfiguration() {
42 42
43QAction* ControllerDialog::toggleViewAction() { 43QAction* ControllerDialog::toggleViewAction() {
44 if (toggle_view_action == nullptr) { 44 if (toggle_view_action == nullptr) {
45 toggle_view_action = new QAction(windowTitle(), this); 45 toggle_view_action = new QAction(tr("&Controller P1"), this);
46 toggle_view_action->setCheckable(true); 46 toggle_view_action->setCheckable(true);
47 toggle_view_action->setChecked(isVisible()); 47 toggle_view_action->setChecked(isVisible());
48 connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible); 48 connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ef92c25bc..0ba7c07cc 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -850,6 +850,16 @@ void GMainWindow::InitializeHotkeys() {
850 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), 850 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
851 &QShortcut::activated, this, 851 &QShortcut::activated, this,
852 [] { Settings::values.audio_muted = !Settings::values.audio_muted; }); 852 [] { Settings::values.audio_muted = !Settings::values.audio_muted; });
853
854 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this),
855 &QShortcut::activated, this, [&] {
856 Settings::values.mouse_panning = !Settings::values.mouse_panning;
857 if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
858 mouse_hide_timer.start();
859 render_window->installEventFilter(render_window);
860 render_window->setAttribute(Qt::WA_Hover, true);
861 }
862 });
853} 863}
854 864
855void GMainWindow::SetDefaultUIGeometry() { 865void GMainWindow::SetDefaultUIGeometry() {
@@ -1197,7 +1207,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
1197 multicore_status_button->setDisabled(true); 1207 multicore_status_button->setDisabled(true);
1198 renderer_status_button->setDisabled(true); 1208 renderer_status_button->setDisabled(true);
1199 1209
1200 if (UISettings::values.hide_mouse) { 1210 if (UISettings::values.hide_mouse || Settings::values.mouse_panning) {
1201 mouse_hide_timer.start(); 1211 mouse_hide_timer.start();
1202 render_window->installEventFilter(render_window); 1212 render_window->installEventFilter(render_window);
1203 render_window->setAttribute(Qt::WA_Hover, true); 1213 render_window->setAttribute(Qt::WA_Hover, true);
@@ -2359,7 +2369,7 @@ void GMainWindow::OnConfigure() {
2359 2369
2360 config->Save(); 2370 config->Save();
2361 2371
2362 if (UISettings::values.hide_mouse && emulation_running) { 2372 if ((UISettings::values.hide_mouse || Settings::values.mouse_panning) && emulation_running) {
2363 render_window->installEventFilter(render_window); 2373 render_window->installEventFilter(render_window);
2364 render_window->setAttribute(Qt::WA_Hover, true); 2374 render_window->setAttribute(Qt::WA_Hover, true);
2365 mouse_hide_timer.start(); 2375 mouse_hide_timer.start();
@@ -2480,6 +2490,11 @@ void GMainWindow::OnCaptureScreenshot() {
2480 .arg(title_id, 16, 16, QLatin1Char{'0'}) 2490 .arg(title_id, 16, 16, QLatin1Char{'0'})
2481 .arg(date); 2491 .arg(date);
2482 2492
2493 if (!Common::FS::CreateDir(screenshot_path.toStdString())) {
2494 OnStartGame();
2495 return;
2496 }
2497
2483#ifdef _WIN32 2498#ifdef _WIN32
2484 if (UISettings::values.enable_screenshot_save_as) { 2499 if (UISettings::values.enable_screenshot_save_as) {
2485 filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename, 2500 filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename,
@@ -2600,7 +2615,8 @@ void GMainWindow::UpdateUISettings() {
2600} 2615}
2601 2616
2602void GMainWindow::HideMouseCursor() { 2617void GMainWindow::HideMouseCursor() {
2603 if (emu_thread == nullptr || UISettings::values.hide_mouse == false) { 2618 if (emu_thread == nullptr ||
2619 (!UISettings::values.hide_mouse && !Settings::values.mouse_panning)) {
2604 mouse_hide_timer.stop(); 2620 mouse_hide_timer.stop();
2605 ShowMouseCursor(); 2621 ShowMouseCursor();
2606 return; 2622 return;
@@ -2610,13 +2626,16 @@ void GMainWindow::HideMouseCursor() {
2610 2626
2611void GMainWindow::ShowMouseCursor() { 2627void GMainWindow::ShowMouseCursor() {
2612 render_window->unsetCursor(); 2628 render_window->unsetCursor();
2613 if (emu_thread != nullptr && UISettings::values.hide_mouse) { 2629 if (emu_thread != nullptr &&
2630 (UISettings::values.hide_mouse || Settings::values.mouse_panning)) {
2614 mouse_hide_timer.start(); 2631 mouse_hide_timer.start();
2615 } 2632 }
2616} 2633}
2617 2634
2618void GMainWindow::OnMouseActivity() { 2635void GMainWindow::OnMouseActivity() {
2619 ShowMouseCursor(); 2636 if (!Settings::values.mouse_panning) {
2637 ShowMouseCursor();
2638 }
2620} 2639}
2621 2640
2622void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) { 2641void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) {
@@ -2751,7 +2770,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
2751 .arg(errors)); 2770 .arg(errors));
2752 } 2771 }
2753 2772
2754 QProgressDialog prog; 2773 QProgressDialog prog(this);
2755 prog.setRange(0, 0); 2774 prog.setRange(0, 0);
2756 prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your " 2775 prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your "
2757 "system's performance.")); 2776 "system's performance."));
@@ -2933,7 +2952,7 @@ void GMainWindow::filterBarSetChecked(bool state) {
2933} 2952}
2934 2953
2935void GMainWindow::UpdateUITheme() { 2954void GMainWindow::UpdateUITheme() {
2936 const QString default_icons = QStringLiteral(":/icons/default"); 2955 const QString default_icons = QStringLiteral("default");
2937 const QString& current_theme = UISettings::values.theme; 2956 const QString& current_theme = UISettings::values.theme;
2938 const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second); 2957 const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second);
2939 QStringList theme_paths(default_theme_paths); 2958 QStringList theme_paths(default_theme_paths);
@@ -2949,7 +2968,6 @@ void GMainWindow::UpdateUITheme() {
2949 qApp->setStyleSheet({}); 2968 qApp->setStyleSheet({});
2950 setStyleSheet({}); 2969 setStyleSheet({});
2951 } 2970 }
2952 theme_paths.append(default_icons);
2953 QIcon::setThemeName(default_icons); 2971 QIcon::setThemeName(default_icons);
2954 } else { 2972 } else {
2955 const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss")); 2973 const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss"));
@@ -2961,10 +2979,7 @@ void GMainWindow::UpdateUITheme() {
2961 } else { 2979 } else {
2962 LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found"); 2980 LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found");
2963 } 2981 }
2964 2982 QIcon::setThemeName(current_theme);
2965 const QString theme_name = QStringLiteral(":/icons/") + current_theme;
2966 theme_paths.append({default_icons, theme_name});
2967 QIcon::setThemeName(theme_name);
2968 } 2983 }
2969 2984
2970 QIcon::setThemeSearchPaths(theme_paths); 2985 QIcon::setThemeSearchPaths(theme_paths);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index e2ad5baf6..048870687 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -14,8 +14,8 @@
14 <string>yuzu</string> 14 <string>yuzu</string>
15 </property> 15 </property>
16 <property name="windowIcon"> 16 <property name="windowIcon">
17 <iconset> 17 <iconset resource="yuzu.qrc">
18 <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset> 18 <normaloff>:/img/yuzu.ico</normaloff>:/img/yuzu.ico</iconset>
19 </property> 19 </property>
20 <property name="tabShape"> 20 <property name="tabShape">
21 <enum>QTabWidget::Rounded</enum> 21 <enum>QTabWidget::Rounded</enum>
@@ -303,6 +303,8 @@
303 </property> 303 </property>
304 </action> 304 </action>
305 </widget> 305 </widget>
306 <resources/> 306 <resources>
307 <include location="yuzu.qrc"/>
308 </resources>
307 <connections/> 309 <connections/>
308</ui> 310</ui>
diff --git a/src/yuzu/yuzu.qrc b/src/yuzu/yuzu.qrc
new file mode 100644
index 000000000..5733cac98
--- /dev/null
+++ b/src/yuzu/yuzu.qrc
@@ -0,0 +1,5 @@
1<RCC>
2 <qresource prefix="/img">
3 <file alias="yuzu.ico">../../dist/yuzu.ico</file>
4 </qresource>
5</RCC>
diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt
index 0b3f2cb54..8461f8896 100644
--- a/src/yuzu_cmd/CMakeLists.txt
+++ b/src/yuzu_cmd/CMakeLists.txt
@@ -1,5 +1,15 @@
1set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) 1set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
2 2
3function(create_resource file output filename)
4 # Read hex data from file
5 file(READ ${file} filedata HEX)
6 # Convert hex data for C compatibility
7 string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
8 # Write data to output file
9 set(RESOURCES_DIR "${PROJECT_BINARY_DIR}/dist" PARENT_SCOPE)
10 file(WRITE "${PROJECT_BINARY_DIR}/dist/${output}" "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
11endfunction()
12
3add_executable(yuzu-cmd 13add_executable(yuzu-cmd
4 config.cpp 14 config.cpp
5 config.h 15 config.h
@@ -24,6 +34,9 @@ if (MSVC)
24endif() 34endif()
25target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads) 35target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads)
26 36
37create_resource("../../dist/yuzu.bmp" "yuzu_cmd/yuzu_icon.h" "yuzu_icon")
38target_include_directories(yuzu-cmd PRIVATE ${RESOURCES_DIR})
39
27target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) 40target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include)
28 41
29if(UNIX AND NOT APPLE) 42if(UNIX AND NOT APPLE)
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f76102459..aa0a9f288 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -388,7 +388,7 @@ void Config::ReadValues() {
388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); 388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)));
389 Settings::values.use_disk_shader_cache.SetValue( 389 Settings::values.use_disk_shader_cache.SetValue(
390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); 390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
391 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); 391 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
392 Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); 392 Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
393 Settings::values.use_asynchronous_gpu_emulation.SetValue( 393 Settings::values.use_asynchronous_gpu_emulation.SetValue(
394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); 394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index 7843d5167..7e391ab89 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -12,6 +12,7 @@
12#include "input_common/mouse/mouse_input.h" 12#include "input_common/mouse/mouse_input.h"
13#include "input_common/sdl/sdl.h" 13#include "input_common/sdl/sdl.h"
14#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 14#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
15#include "yuzu_cmd/yuzu_icon.h"
15 16
16EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_) 17EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_)
17 : input_subsystem{input_subsystem_} { 18 : input_subsystem{input_subsystem_} {
@@ -30,7 +31,8 @@ EmuWindow_SDL2::~EmuWindow_SDL2() {
30 31
31void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) { 32void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) {
32 TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0); 33 TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0);
33 input_subsystem->GetMouse()->MouseMove(x, y); 34
35 input_subsystem->GetMouse()->MouseMove(x, y, 0, 0);
34} 36}
35 37
36void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) { 38void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) {
@@ -193,6 +195,22 @@ void EmuWindow_SDL2::WaitEvent() {
193 } 195 }
194} 196}
195 197
198void EmuWindow_SDL2::SetWindowIcon() {
199 SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size);
200 if (yuzu_icon_stream == nullptr) {
201 LOG_WARNING(Frontend, "Failed to create yuzu icon stream.");
202 return;
203 }
204 SDL_Surface* const window_icon = SDL_LoadBMP_RW(yuzu_icon_stream, 1);
205 if (window_icon == nullptr) {
206 LOG_WARNING(Frontend, "Failed to read BMP from stream.");
207 return;
208 }
209 // The icon is attached to the window pointer
210 SDL_SetWindowIcon(render_window, window_icon);
211 SDL_FreeSurface(window_icon);
212}
213
196void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) { 214void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) {
197 SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second); 215 SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second);
198} 216}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
index a93141240..51a12a6a9 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
@@ -32,6 +32,9 @@ public:
32 /// Wait for the next event on the main thread. 32 /// Wait for the next event on the main thread.
33 void WaitEvent(); 33 void WaitEvent();
34 34
35 // Sets the window icon from yuzu.bmp
36 void SetWindowIcon();
37
35protected: 38protected:
36 /// Called by WaitEvent when a key is pressed or released. 39 /// Called by WaitEvent when a key is pressed or released.
37 void OnKeyEvent(int key, u8 state); 40 void OnKeyEvent(int key, u8 state);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index deddea9ee..a02485c14 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -107,6 +107,8 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste
107 dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, 107 dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
108 SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); 108 SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
109 109
110 SetWindowIcon();
111
110 if (fullscreen) { 112 if (fullscreen) {
111 Fullscreen(); 113 Fullscreen();
112 } 114 }
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 3ba657c00..6f9b00461 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -35,6 +35,8 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
35 std::exit(EXIT_FAILURE); 35 std::exit(EXIT_FAILURE);
36 } 36 }
37 37
38 SetWindowIcon();
39
38 switch (wm.subsystem) { 40 switch (wm.subsystem) {
39#ifdef SDL_VIDEO_DRIVER_WINDOWS 41#ifdef SDL_VIDEO_DRIVER_WINDOWS
40 case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: 42 case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 0e1f3bdb3..982c41785 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
215 // Core is loaded, start the GPU (makes the GPU contexts current to this thread) 215 // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
216 system.GPU().Start(); 216 system.GPU().Start();
217 217
218 system.Renderer().Rasterizer().LoadDiskResources( 218 system.Renderer().ReadRasterizer()->LoadDiskResources(
219 system.CurrentProcess()->GetTitleID(), false, 219 system.CurrentProcess()->GetTitleID(), false,
220 [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); 220 [](VideoCore::LoadCallbackStage, size_t value, size_t total) {});
221 221