summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------externals/dynarmic0
m---------externals/sirit0
m---------externals/xbyak0
-rw-r--r--src/audio_core/renderer/command/resample/upsample.cpp97
-rw-r--r--src/common/input.h3
-rw-r--r--src/common/settings.cpp14
-rw-r--r--src/common/settings.h16
-rw-r--r--src/core/CMakeLists.txt1
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp4
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp4
-rw-r--r--src/core/core_timing.cpp42
-rw-r--r--src/core/core_timing.h9
-rw-r--r--src/core/hid/emulated_controller.cpp10
-rw-r--r--src/core/hle/kernel/k_code_memory.cpp29
-rw-r--r--src/core/hle/kernel/k_code_memory.h6
-rw-r--r--src/core/hle/kernel/k_hardware_timer.cpp6
-rw-r--r--src/core/hle/kernel/k_memory_manager.cpp8
-rw-r--r--src/core/hle/kernel/k_page_group.cpp121
-rw-r--r--src/core/hle/kernel/k_page_group.h163
-rw-r--r--src/core/hle/kernel/k_page_table.cpp142
-rw-r--r--src/core/hle/kernel/k_page_table.h9
-rw-r--r--src/core/hle/kernel/k_shared_memory.cpp19
-rw-r--r--src/core/hle/kernel/memory_types.h3
-rw-r--r--src/core/hle/kernel/svc.cpp2
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h2
-rw-r--r--src/core/internal_network/network.cpp6
-rw-r--r--src/core/memory.cpp6
-rw-r--r--src/input_common/drivers/tas_input.cpp12
-rw-r--r--src/input_common/drivers/tas_input.h2
-rw-r--r--src/input_common/helpers/stick_from_buttons.cpp17
-rw-r--r--src/input_common/input_mapping.cpp2
-rw-r--r--src/input_common/main.cpp26
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp8
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp58
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp45
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp116
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/shader_recompiler/shader_info.h12
-rw-r--r--src/tests/video_core/buffer_base.cpp2
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_base.h14
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/engines/engine_upload.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp9
-rw-r--r--src/video_core/engines/maxwell_dma.cpp21
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_turbo_mode.comp29
-rw-r--r--src/video_core/invalidation_accumulator.h79
-rw-r--r--src/video_core/macro/macro_hle.cpp42
-rw-r--r--src/video_core/memory_manager.cpp102
-rw-r--r--src/video_core/memory_manager.h18
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp8
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp29
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp42
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp132
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp222
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.h35
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp1401
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h445
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp14
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp37
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h36
-rw-r--r--src/yuzu/Info.plist2
-rw-r--r--src/yuzu/bootmanager.cpp19
-rw-r--r--src/yuzu/bootmanager.h4
-rw-r--r--src/yuzu/configuration/config.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics.ui15
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp22
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h2
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui20
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp6
-rw-r--r--src/yuzu/configuration/configure_input_player.h3
-rw-r--r--src/yuzu/main.cpp57
-rw-r--r--src/yuzu/main.h5
-rw-r--r--src/yuzu_cmd/config.cpp2
99 files changed, 2377 insertions, 1763 deletions
diff --git a/externals/dynarmic b/externals/dynarmic
Subproject bd570e093ca1d1206961296b90df65cda7de8e8 Subproject befe547d5631024a70d81d2ccee808bbfcb3854
diff --git a/externals/sirit b/externals/sirit
Subproject d7ad93a88864bda94e282e95028f90b5784e4d2 Subproject ab75463999f4f3291976b079d42d52ee91eebf3
diff --git a/externals/xbyak b/externals/xbyak
Subproject 348e3e548ebac06d243e5881caec8440e249f65 Subproject a1ac3750f9a639b5a6c6d6c7da4259b8d679098
diff --git a/src/audio_core/renderer/command/resample/upsample.cpp b/src/audio_core/renderer/command/resample/upsample.cpp
index 6c3ff31f7..5f7db12ca 100644
--- a/src/audio_core/renderer/command/resample/upsample.cpp
+++ b/src/audio_core/renderer/command/resample/upsample.cpp
@@ -20,25 +20,25 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
20 const u32 target_sample_count, const u32 source_sample_count, 20 const u32 target_sample_count, const u32 source_sample_count,
21 UpsamplerState* state) { 21 UpsamplerState* state) {
22 constexpr u32 WindowSize = 10; 22 constexpr u32 WindowSize = 10;
23 constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow1{ 23 constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc1{
24 51.93359375f, -18.80078125f, 9.73046875f, -5.33203125f, 2.84375f, 24 0.95376587f, -0.12872314f, 0.060028076f, -0.032470703f, 0.017669678f,
25 -1.41015625f, 0.62109375f, -0.2265625f, 0.0625f, -0.00390625f, 25 -0.009124756f, 0.004272461f, -0.001739502f, 0.000579834f, -0.000091552734f,
26 }; 26 };
27 constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow2{ 27 constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc2{
28 105.35546875f, -24.52734375f, 11.9609375f, -6.515625f, 3.52734375f, 28 0.8230896f, -0.19161987f, 0.093444824f, -0.05090332f, 0.027557373f,
29 -1.796875f, 0.828125f, -0.32421875f, 0.1015625f, -0.015625f, 29 -0.014038086f, 0.0064697266f, -0.002532959f, 0.00079345703f, -0.00012207031f,
30 }; 30 };
31 constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow3{ 31 constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc3{
32 122.08203125f, -16.47656250f, 7.68359375f, -4.15625000f, 2.26171875f, 32 0.6298828f, -0.19274902f, 0.09725952f, -0.05319214f, 0.028625488f,
33 -1.16796875f, 0.54687500f, -0.22265625f, 0.07421875f, -0.01171875f, 33 -0.014373779f, 0.006500244f, -0.0024719238f, 0.0007324219f, -0.000091552734f,
34 }; 34 };
35 constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow4{ 35 constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc4{
36 23.73437500f, -9.62109375f, 5.07812500f, -2.78125000f, 1.46875000f, 36 0.4057312f, -0.1468811f, 0.07601929f, -0.041656494f, 0.022216797f,
37 -0.71484375f, 0.30859375f, -0.10546875f, 0.02734375f, 0.00000000f, 37 -0.011016846f, 0.004852295f, -0.0017700195f, 0.00048828125f, -0.000030517578f,
38 }; 38 };
39 constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow5{ 39 constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc5{
40 80.62500000f, -24.67187500f, 12.44921875f, -6.80859375f, 3.66406250f, 40 0.1854248f, -0.075164795f, 0.03967285f, -0.021728516f, 0.011474609f,
41 -1.83984375f, 0.83203125f, -0.31640625f, 0.09375000f, -0.01171875f, 41 -0.005584717f, 0.0024108887f, -0.0008239746f, 0.00021362305f, 0.0f,
42 }; 42 };
43 43
44 if (!state->initialized) { 44 if (!state->initialized) {
@@ -91,52 +91,31 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
91 static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize); 91 static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize);
92 }; 92 };
93 93
94 auto calculate_sample = [&state](std::span<const Common::FixedPoint<24, 8>> coeffs1, 94 auto calculate_sample = [&state](std::span<const Common::FixedPoint<17, 15>> coeffs1,
95 std::span<const Common::FixedPoint<24, 8>> coeffs2) -> s32 { 95 std::span<const Common::FixedPoint<17, 15>> coeffs2) -> s32 {
96 auto output_index{state->history_output_index}; 96 auto output_index{state->history_output_index};
97 auto start_pos{output_index - state->history_start_index + 1U}; 97 u64 result{0};
98 auto end_pos{10U};
99 98
100 if (start_pos < 10) { 99 for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) {
101 end_pos = start_pos; 100 result += static_cast<u64>(state->history[output_index].to_raw()) *
102 } 101 coeffs1[coeff_index].to_raw();
103
104 u64 prev_contrib{0};
105 u32 coeff_index{0};
106 for (; coeff_index < end_pos; coeff_index++, output_index--) {
107 prev_contrib += static_cast<u64>(state->history[output_index].to_raw()) *
108 coeffs1[coeff_index].to_raw();
109 }
110 102
111 auto end_index{state->history_end_index}; 103 output_index = output_index == state->history_start_index ? state->history_end_index
112 for (; start_pos < 9; start_pos++, coeff_index++, end_index--) { 104 : output_index - 1;
113 prev_contrib += static_cast<u64>(state->history[end_index].to_raw()) *
114 coeffs1[coeff_index].to_raw();
115 } 105 }
116 106
117 output_index = 107 output_index =
118 static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize); 108 static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize);
119 start_pos = state->history_end_index - output_index + 1U;
120 end_pos = 10U;
121 109
122 if (start_pos < 10) { 110 for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) {
123 end_pos = start_pos; 111 result += static_cast<u64>(state->history[output_index].to_raw()) *
124 } 112 coeffs2[coeff_index].to_raw();
125
126 u64 next_contrib{0};
127 coeff_index = 0;
128 for (; coeff_index < end_pos; coeff_index++, output_index++) {
129 next_contrib += static_cast<u64>(state->history[output_index].to_raw()) *
130 coeffs2[coeff_index].to_raw();
131 }
132 113
133 auto start_index{state->history_start_index}; 114 output_index = output_index == state->history_end_index ? state->history_start_index
134 for (; start_pos < 9; start_pos++, start_index++, coeff_index++) { 115 : output_index + 1;
135 next_contrib += static_cast<u64>(state->history[start_index].to_raw()) *
136 coeffs2[coeff_index].to_raw();
137 } 116 }
138 117
139 return static_cast<s32>(((prev_contrib >> 15) + (next_contrib >> 15)) >> 8); 118 return static_cast<s32>(result >> (8 + 15));
140 }; 119 };
141 120
142 switch (state->ratio.to_int_floor()) { 121 switch (state->ratio.to_int_floor()) {
@@ -150,23 +129,23 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
150 break; 129 break;
151 130
152 case 1: 131 case 1:
153 output[write_index] = calculate_sample(SincWindow3, SincWindow4); 132 output[write_index] = calculate_sample(WindowedSinc1, WindowedSinc5);
154 break; 133 break;
155 134
156 case 2: 135 case 2:
157 output[write_index] = calculate_sample(SincWindow2, SincWindow1); 136 output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
158 break; 137 break;
159 138
160 case 3: 139 case 3:
161 output[write_index] = calculate_sample(SincWindow5, SincWindow5); 140 output[write_index] = calculate_sample(WindowedSinc3, WindowedSinc3);
162 break; 141 break;
163 142
164 case 4: 143 case 4:
165 output[write_index] = calculate_sample(SincWindow1, SincWindow2); 144 output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
166 break; 145 break;
167 146
168 case 5: 147 case 5:
169 output[write_index] = calculate_sample(SincWindow4, SincWindow3); 148 output[write_index] = calculate_sample(WindowedSinc5, WindowedSinc1);
170 break; 149 break;
171 } 150 }
172 state->sample_index = static_cast<u8>((state->sample_index + 1) % 6); 151 state->sample_index = static_cast<u8>((state->sample_index + 1) % 6);
@@ -183,11 +162,11 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
183 break; 162 break;
184 163
185 case 1: 164 case 1:
186 output[write_index] = calculate_sample(SincWindow2, SincWindow1); 165 output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
187 break; 166 break;
188 167
189 case 2: 168 case 2:
190 output[write_index] = calculate_sample(SincWindow1, SincWindow2); 169 output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
191 break; 170 break;
192 } 171 }
193 state->sample_index = static_cast<u8>((state->sample_index + 1) % 3); 172 state->sample_index = static_cast<u8>((state->sample_index + 1) % 3);
@@ -204,12 +183,12 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
204 break; 183 break;
205 184
206 case 1: 185 case 1:
207 output[write_index] = calculate_sample(SincWindow1, SincWindow2); 186 output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
208 break; 187 break;
209 188
210 case 2: 189 case 2:
211 increment(); 190 increment();
212 output[write_index] = calculate_sample(SincWindow2, SincWindow1); 191 output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
213 break; 192 break;
214 } 193 }
215 state->sample_index = static_cast<u8>((state->sample_index + 1) % 3); 194 state->sample_index = static_cast<u8>((state->sample_index + 1) % 3);
diff --git a/src/common/input.h b/src/common/input.h
index fc14fd7bf..d27b1d772 100644
--- a/src/common/input.h
+++ b/src/common/input.h
@@ -292,9 +292,6 @@ class InputDevice {
292public: 292public:
293 virtual ~InputDevice() = default; 293 virtual ~InputDevice() = default;
294 294
295 // Request input device to update if necessary
296 virtual void SoftUpdate() {}
297
298 // Force input device to update data regardless of the current state 295 // Force input device to update data regardless of the current state
299 virtual void ForceUpdate() {} 296 virtual void ForceUpdate() {}
300 297
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 149e621f9..b1a2aa8b2 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -129,6 +129,10 @@ void UpdateRescalingInfo() {
129 info.up_scale = 1; 129 info.up_scale = 1;
130 info.down_shift = 0; 130 info.down_shift = 0;
131 break; 131 break;
132 case ResolutionSetup::Res3_2X:
133 info.up_scale = 3;
134 info.down_shift = 1;
135 break;
132 case ResolutionSetup::Res2X: 136 case ResolutionSetup::Res2X:
133 info.up_scale = 2; 137 info.up_scale = 2;
134 info.down_shift = 0; 138 info.down_shift = 0;
@@ -149,6 +153,14 @@ void UpdateRescalingInfo() {
149 info.up_scale = 6; 153 info.up_scale = 6;
150 info.down_shift = 0; 154 info.down_shift = 0;
151 break; 155 break;
156 case ResolutionSetup::Res7X:
157 info.up_scale = 7;
158 info.down_shift = 0;
159 break;
160 case ResolutionSetup::Res8X:
161 info.up_scale = 8;
162 info.down_shift = 0;
163 break;
152 default: 164 default:
153 ASSERT(false); 165 ASSERT(false);
154 info.up_scale = 1; 166 info.up_scale = 1;
@@ -185,6 +197,7 @@ void RestoreGlobalState(bool is_powered_on) {
185 // Renderer 197 // Renderer
186 values.fsr_sharpening_slider.SetGlobal(true); 198 values.fsr_sharpening_slider.SetGlobal(true);
187 values.renderer_backend.SetGlobal(true); 199 values.renderer_backend.SetGlobal(true);
200 values.renderer_force_max_clock.SetGlobal(true);
188 values.vulkan_device.SetGlobal(true); 201 values.vulkan_device.SetGlobal(true);
189 values.aspect_ratio.SetGlobal(true); 202 values.aspect_ratio.SetGlobal(true);
190 values.max_anisotropy.SetGlobal(true); 203 values.max_anisotropy.SetGlobal(true);
@@ -200,6 +213,7 @@ void RestoreGlobalState(bool is_powered_on) {
200 values.use_asynchronous_shaders.SetGlobal(true); 213 values.use_asynchronous_shaders.SetGlobal(true);
201 values.use_fast_gpu_time.SetGlobal(true); 214 values.use_fast_gpu_time.SetGlobal(true);
202 values.use_pessimistic_flushes.SetGlobal(true); 215 values.use_pessimistic_flushes.SetGlobal(true);
216 values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
203 values.bg_red.SetGlobal(true); 217 values.bg_red.SetGlobal(true);
204 values.bg_green.SetGlobal(true); 218 values.bg_green.SetGlobal(true);
205 values.bg_blue.SetGlobal(true); 219 values.bg_blue.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 5017951c5..80b2eeabc 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -56,11 +56,14 @@ enum class ResolutionSetup : u32 {
56 Res1_2X = 0, 56 Res1_2X = 0,
57 Res3_4X = 1, 57 Res3_4X = 1,
58 Res1X = 2, 58 Res1X = 2,
59 Res2X = 3, 59 Res3_2X = 3,
60 Res3X = 4, 60 Res2X = 4,
61 Res4X = 5, 61 Res3X = 5,
62 Res5X = 6, 62 Res4X = 6,
63 Res6X = 7, 63 Res5X = 7,
64 Res6X = 8,
65 Res7X = 9,
66 Res8X = 10,
64}; 67};
65 68
66enum class ScalingFilter : u32 { 69enum class ScalingFilter : u32 {
@@ -415,6 +418,7 @@ struct Values {
415 // Renderer 418 // Renderer
416 SwitchableSetting<RendererBackend, true> renderer_backend{ 419 SwitchableSetting<RendererBackend, true> renderer_backend{
417 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; 420 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
421 SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
418 Setting<bool> renderer_debug{false, "debug"}; 422 Setting<bool> renderer_debug{false, "debug"};
419 Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; 423 Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
420 Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; 424 Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
@@ -451,6 +455,8 @@ struct Values {
451 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; 455 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
452 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; 456 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
453 SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; 457 SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
458 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
459 "use_vulkan_driver_pipeline_cache"};
454 460
455 SwitchableSetting<u8> bg_red{0, "bg_red"}; 461 SwitchableSetting<u8> bg_red{0, "bg_red"};
456 SwitchableSetting<u8> bg_green{0, "bg_green"}; 462 SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 0252c8c31..5afdeb5ff 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -226,6 +226,7 @@ add_library(core STATIC
226 hle/kernel/k_page_buffer.h 226 hle/kernel/k_page_buffer.h
227 hle/kernel/k_page_heap.cpp 227 hle/kernel/k_page_heap.cpp
228 hle/kernel/k_page_heap.h 228 hle/kernel/k_page_heap.h
229 hle/kernel/k_page_group.cpp
229 hle/kernel/k_page_group.h 230 hle/kernel/k_page_group.h
230 hle/kernel/k_page_table.cpp 231 hle/kernel/k_page_table.cpp
231 hle/kernel/k_page_table.h 232 hle/kernel/k_page_table.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 947747d36..2a7570073 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
229 config.enable_cycle_counting = true; 229 config.enable_cycle_counting = true;
230 230
231 // Code cache size 231 // Code cache size
232#ifdef ARCHITECTURE_arm64
233 config.code_cache_size = 128_MiB;
234#else
232 config.code_cache_size = 512_MiB; 235 config.code_cache_size = 512_MiB;
236#endif
233 237
234 // Allow memory fault handling to work 238 // Allow memory fault handling to work
235 if (system.DebuggerEnabled()) { 239 if (system.DebuggerEnabled()) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 3df943df7..7229fdc2a 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
288 config.enable_cycle_counting = true; 288 config.enable_cycle_counting = true;
289 289
290 // Code cache size 290 // Code cache size
291#ifdef ARCHITECTURE_arm64
292 config.code_cache_size = 128_MiB;
293#else
291 config.code_cache_size = 512_MiB; 294 config.code_cache_size = 512_MiB;
295#endif
292 296
293 // Allow memory fault handling to work 297 // Allow memory fault handling to work
294 if (system.DebuggerEnabled()) { 298 if (system.DebuggerEnabled()) {
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 0e7b5f943..6bac6722f 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -142,16 +142,24 @@ void CoreTiming::ScheduleLoopingEvent(std::chrono::nanoseconds start_time,
142} 142}
143 143
144void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, 144void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
145 std::uintptr_t user_data) { 145 std::uintptr_t user_data, bool wait) {
146 std::scoped_lock scope{basic_lock}; 146 {
147 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { 147 std::scoped_lock lk{basic_lock};
148 return e.type.lock().get() == event_type.get() && e.user_data == user_data; 148 const auto itr =
149 }); 149 std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
150 150 return e.type.lock().get() == event_type.get() && e.user_data == user_data;
151 // Removing random items breaks the invariant so we have to re-establish it. 151 });
152 if (itr != event_queue.end()) { 152
153 event_queue.erase(itr, event_queue.end()); 153 // Removing random items breaks the invariant so we have to re-establish it.
154 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 154 if (itr != event_queue.end()) {
155 event_queue.erase(itr, event_queue.end());
156 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
157 }
158 }
159
160 // Force any in-progress events to finish
161 if (wait) {
162 std::scoped_lock lk{advance_lock};
155 } 163 }
156} 164}
157 165
@@ -190,20 +198,6 @@ u64 CoreTiming::GetClockTicks() const {
190 return CpuCyclesToClockCycles(ticks); 198 return CpuCyclesToClockCycles(ticks);
191} 199}
192 200
193void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
194 std::scoped_lock lock{basic_lock};
195
196 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
197 return e.type.lock().get() == event_type.get();
198 });
199
200 // Removing random items breaks the invariant so we have to re-establish it.
201 if (itr != event_queue.end()) {
202 event_queue.erase(itr, event_queue.end());
203 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
204 }
205}
206
207std::optional<s64> CoreTiming::Advance() { 201std::optional<s64> CoreTiming::Advance() {
208 std::scoped_lock lock{advance_lock, basic_lock}; 202 std::scoped_lock lock{advance_lock, basic_lock};
209 global_timer = GetGlobalTimeNs().count(); 203 global_timer = GetGlobalTimeNs().count();
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index b5925193c..da366637b 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -98,10 +98,13 @@ public:
98 const std::shared_ptr<EventType>& event_type, 98 const std::shared_ptr<EventType>& event_type,
99 std::uintptr_t user_data = 0, bool absolute_time = false); 99 std::uintptr_t user_data = 0, bool absolute_time = false);
100 100
101 void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data); 101 void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data,
102 bool wait = true);
102 103
103 /// We only permit one event of each type in the queue at a time. 104 void UnscheduleEventWithoutWait(const std::shared_ptr<EventType>& event_type,
104 void RemoveEvent(const std::shared_ptr<EventType>& event_type); 105 std::uintptr_t user_data) {
106 UnscheduleEvent(event_type, user_data, false);
107 }
105 108
106 void AddTicks(u64 ticks_to_add); 109 void AddTicks(u64 ticks_to_add);
107 110
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp
index 71364c323..7a01f3f4c 100644
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -1434,16 +1434,6 @@ AnalogSticks EmulatedController::GetSticks() const {
1434 return {}; 1434 return {};
1435 } 1435 }
1436 1436
1437 // Some drivers like stick from buttons need constant refreshing
1438 for (auto& device : stick_devices) {
1439 if (!device) {
1440 continue;
1441 }
1442 lock.unlock();
1443 device->SoftUpdate();
1444 lock.lock();
1445 }
1446
1447 return controller.analog_stick_state; 1437 return controller.analog_stick_state;
1448} 1438}
1449 1439
diff --git a/src/core/hle/kernel/k_code_memory.cpp b/src/core/hle/kernel/k_code_memory.cpp
index 4b1c134d4..d9da1e600 100644
--- a/src/core/hle/kernel/k_code_memory.cpp
+++ b/src/core/hle/kernel/k_code_memory.cpp
@@ -27,13 +27,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si
27 auto& page_table = m_owner->PageTable(); 27 auto& page_table = m_owner->PageTable();
28 28
29 // Construct the page group. 29 // Construct the page group.
30 m_page_group = {}; 30 m_page_group.emplace(kernel, page_table.GetBlockInfoManager());
31 31
32 // Lock the memory. 32 // Lock the memory.
33 R_TRY(page_table.LockForCodeMemory(&m_page_group, addr, size)) 33 R_TRY(page_table.LockForCodeMemory(std::addressof(*m_page_group), addr, size))
34 34
35 // Clear the memory. 35 // Clear the memory.
36 for (const auto& block : m_page_group.Nodes()) { 36 for (const auto& block : *m_page_group) {
37 std::memset(device_memory.GetPointer<void>(block.GetAddress()), 0xFF, block.GetSize()); 37 std::memset(device_memory.GetPointer<void>(block.GetAddress()), 0xFF, block.GetSize());
38 } 38 }
39 39
@@ -51,12 +51,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si
51void KCodeMemory::Finalize() { 51void KCodeMemory::Finalize() {
52 // Unlock. 52 // Unlock.
53 if (!m_is_mapped && !m_is_owner_mapped) { 53 if (!m_is_mapped && !m_is_owner_mapped) {
54 const size_t size = m_page_group.GetNumPages() * PageSize; 54 const size_t size = m_page_group->GetNumPages() * PageSize;
55 m_owner->PageTable().UnlockForCodeMemory(m_address, size, m_page_group); 55 m_owner->PageTable().UnlockForCodeMemory(m_address, size, *m_page_group);
56 } 56 }
57 57
58 // Close the page group. 58 // Close the page group.
59 m_page_group = {}; 59 m_page_group->Close();
60 m_page_group->Finalize();
60 61
61 // Close our reference to our owner. 62 // Close our reference to our owner.
62 m_owner->Close(); 63 m_owner->Close();
@@ -64,7 +65,7 @@ void KCodeMemory::Finalize() {
64 65
65Result KCodeMemory::Map(VAddr address, size_t size) { 66Result KCodeMemory::Map(VAddr address, size_t size) {
66 // Validate the size. 67 // Validate the size.
67 R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); 68 R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
68 69
69 // Lock ourselves. 70 // Lock ourselves.
70 KScopedLightLock lk(m_lock); 71 KScopedLightLock lk(m_lock);
@@ -74,7 +75,7 @@ Result KCodeMemory::Map(VAddr address, size_t size) {
74 75
75 // Map the memory. 76 // Map the memory.
76 R_TRY(kernel.CurrentProcess()->PageTable().MapPages( 77 R_TRY(kernel.CurrentProcess()->PageTable().MapPages(
77 address, m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite)); 78 address, *m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite));
78 79
79 // Mark ourselves as mapped. 80 // Mark ourselves as mapped.
80 m_is_mapped = true; 81 m_is_mapped = true;
@@ -84,13 +85,13 @@ Result KCodeMemory::Map(VAddr address, size_t size) {
84 85
85Result KCodeMemory::Unmap(VAddr address, size_t size) { 86Result KCodeMemory::Unmap(VAddr address, size_t size) {
86 // Validate the size. 87 // Validate the size.
87 R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); 88 R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
88 89
89 // Lock ourselves. 90 // Lock ourselves.
90 KScopedLightLock lk(m_lock); 91 KScopedLightLock lk(m_lock);
91 92
92 // Unmap the memory. 93 // Unmap the memory.
93 R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, m_page_group, 94 R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, *m_page_group,
94 KMemoryState::CodeOut)); 95 KMemoryState::CodeOut));
95 96
96 // Mark ourselves as unmapped. 97 // Mark ourselves as unmapped.
@@ -101,7 +102,7 @@ Result KCodeMemory::Unmap(VAddr address, size_t size) {
101 102
102Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm) { 103Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm) {
103 // Validate the size. 104 // Validate the size.
104 R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); 105 R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
105 106
106 // Lock ourselves. 107 // Lock ourselves.
107 KScopedLightLock lk(m_lock); 108 KScopedLightLock lk(m_lock);
@@ -125,7 +126,7 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission
125 126
126 // Map the memory. 127 // Map the memory.
127 R_TRY( 128 R_TRY(
128 m_owner->PageTable().MapPages(address, m_page_group, KMemoryState::GeneratedCode, k_perm)); 129 m_owner->PageTable().MapPages(address, *m_page_group, KMemoryState::GeneratedCode, k_perm));
129 130
130 // Mark ourselves as mapped. 131 // Mark ourselves as mapped.
131 m_is_owner_mapped = true; 132 m_is_owner_mapped = true;
@@ -135,13 +136,13 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission
135 136
136Result KCodeMemory::UnmapFromOwner(VAddr address, size_t size) { 137Result KCodeMemory::UnmapFromOwner(VAddr address, size_t size) {
137 // Validate the size. 138 // Validate the size.
138 R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize); 139 R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
139 140
140 // Lock ourselves. 141 // Lock ourselves.
141 KScopedLightLock lk(m_lock); 142 KScopedLightLock lk(m_lock);
142 143
143 // Unmap the memory. 144 // Unmap the memory.
144 R_TRY(m_owner->PageTable().UnmapPages(address, m_page_group, KMemoryState::GeneratedCode)); 145 R_TRY(m_owner->PageTable().UnmapPages(address, *m_page_group, KMemoryState::GeneratedCode));
145 146
146 // Mark ourselves as unmapped. 147 // Mark ourselves as unmapped.
147 m_is_owner_mapped = false; 148 m_is_owner_mapped = false;
diff --git a/src/core/hle/kernel/k_code_memory.h b/src/core/hle/kernel/k_code_memory.h
index 2e7e1436a..5b260b385 100644
--- a/src/core/hle/kernel/k_code_memory.h
+++ b/src/core/hle/kernel/k_code_memory.h
@@ -3,6 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <optional>
7
6#include "common/common_types.h" 8#include "common/common_types.h"
7#include "core/device_memory.h" 9#include "core/device_memory.h"
8#include "core/hle/kernel/k_auto_object.h" 10#include "core/hle/kernel/k_auto_object.h"
@@ -49,11 +51,11 @@ public:
49 return m_address; 51 return m_address;
50 } 52 }
51 size_t GetSize() const { 53 size_t GetSize() const {
52 return m_is_initialized ? m_page_group.GetNumPages() * PageSize : 0; 54 return m_is_initialized ? m_page_group->GetNumPages() * PageSize : 0;
53 } 55 }
54 56
55private: 57private:
56 KPageGroup m_page_group{}; 58 std::optional<KPageGroup> m_page_group{};
57 KProcess* m_owner{}; 59 KProcess* m_owner{};
58 VAddr m_address{}; 60 VAddr m_address{};
59 KLightLock m_lock; 61 KLightLock m_lock;
diff --git a/src/core/hle/kernel/k_hardware_timer.cpp b/src/core/hle/kernel/k_hardware_timer.cpp
index 6bba79ea0..4dcd53821 100644
--- a/src/core/hle/kernel/k_hardware_timer.cpp
+++ b/src/core/hle/kernel/k_hardware_timer.cpp
@@ -18,7 +18,8 @@ void KHardwareTimer::Initialize() {
18} 18}
19 19
20void KHardwareTimer::Finalize() { 20void KHardwareTimer::Finalize() {
21 this->DisableInterrupt(); 21 m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this));
22 m_wakeup_time = std::numeric_limits<s64>::max();
22 m_event_type.reset(); 23 m_event_type.reset();
23} 24}
24 25
@@ -59,7 +60,8 @@ void KHardwareTimer::EnableInterrupt(s64 wakeup_time) {
59} 60}
60 61
61void KHardwareTimer::DisableInterrupt() { 62void KHardwareTimer::DisableInterrupt() {
62 m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this)); 63 m_kernel.System().CoreTiming().UnscheduleEventWithoutWait(m_event_type,
64 reinterpret_cast<uintptr_t>(this));
63 m_wakeup_time = std::numeric_limits<s64>::max(); 65 m_wakeup_time = std::numeric_limits<s64>::max();
64} 66}
65 67
diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp
index bd33571da..cd6ea388e 100644
--- a/src/core/hle/kernel/k_memory_manager.cpp
+++ b/src/core/hle/kernel/k_memory_manager.cpp
@@ -223,7 +223,7 @@ Result KMemoryManager::AllocatePageGroupImpl(KPageGroup* out, size_t num_pages,
223 223
224 // Ensure that we don't leave anything un-freed. 224 // Ensure that we don't leave anything un-freed.
225 ON_RESULT_FAILURE { 225 ON_RESULT_FAILURE {
226 for (const auto& it : out->Nodes()) { 226 for (const auto& it : *out) {
227 auto& manager = this->GetManager(it.GetAddress()); 227 auto& manager = this->GetManager(it.GetAddress());
228 const size_t node_num_pages = std::min<u64>( 228 const size_t node_num_pages = std::min<u64>(
229 it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize); 229 it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
@@ -285,7 +285,7 @@ Result KMemoryManager::AllocateAndOpen(KPageGroup* out, size_t num_pages, u32 op
285 m_has_optimized_process[static_cast<size_t>(pool)], true)); 285 m_has_optimized_process[static_cast<size_t>(pool)], true));
286 286
287 // Open the first reference to the pages. 287 // Open the first reference to the pages.
288 for (const auto& block : out->Nodes()) { 288 for (const auto& block : *out) {
289 PAddr cur_address = block.GetAddress(); 289 PAddr cur_address = block.GetAddress();
290 size_t remaining_pages = block.GetNumPages(); 290 size_t remaining_pages = block.GetNumPages();
291 while (remaining_pages > 0) { 291 while (remaining_pages > 0) {
@@ -335,7 +335,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32
335 // Perform optimized memory tracking, if we should. 335 // Perform optimized memory tracking, if we should.
336 if (optimized) { 336 if (optimized) {
337 // Iterate over the allocated blocks. 337 // Iterate over the allocated blocks.
338 for (const auto& block : out->Nodes()) { 338 for (const auto& block : *out) {
339 // Get the block extents. 339 // Get the block extents.
340 const PAddr block_address = block.GetAddress(); 340 const PAddr block_address = block.GetAddress();
341 const size_t block_pages = block.GetNumPages(); 341 const size_t block_pages = block.GetNumPages();
@@ -391,7 +391,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32
391 } 391 }
392 } else { 392 } else {
393 // Set all the allocated memory. 393 // Set all the allocated memory.
394 for (const auto& block : out->Nodes()) { 394 for (const auto& block : *out) {
395 std::memset(m_system.DeviceMemory().GetPointer<void>(block.GetAddress()), fill_pattern, 395 std::memset(m_system.DeviceMemory().GetPointer<void>(block.GetAddress()), fill_pattern,
396 block.GetSize()); 396 block.GetSize());
397 } 397 }
diff --git a/src/core/hle/kernel/k_page_group.cpp b/src/core/hle/kernel/k_page_group.cpp
new file mode 100644
index 000000000..d8c644a33
--- /dev/null
+++ b/src/core/hle/kernel/k_page_group.cpp
@@ -0,0 +1,121 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "core/hle/kernel/k_dynamic_resource_manager.h"
5#include "core/hle/kernel/k_memory_manager.h"
6#include "core/hle/kernel/k_page_group.h"
7#include "core/hle/kernel/kernel.h"
8#include "core/hle/kernel/svc_results.h"
9
10namespace Kernel {
11
12void KPageGroup::Finalize() {
13 KBlockInfo* cur = m_first_block;
14 while (cur != nullptr) {
15 KBlockInfo* next = cur->GetNext();
16 m_manager->Free(cur);
17 cur = next;
18 }
19
20 m_first_block = nullptr;
21 m_last_block = nullptr;
22}
23
24void KPageGroup::CloseAndReset() {
25 auto& mm = m_kernel.MemoryManager();
26
27 KBlockInfo* cur = m_first_block;
28 while (cur != nullptr) {
29 KBlockInfo* next = cur->GetNext();
30 mm.Close(cur->GetAddress(), cur->GetNumPages());
31 m_manager->Free(cur);
32 cur = next;
33 }
34
35 m_first_block = nullptr;
36 m_last_block = nullptr;
37}
38
39size_t KPageGroup::GetNumPages() const {
40 size_t num_pages = 0;
41
42 for (const auto& it : *this) {
43 num_pages += it.GetNumPages();
44 }
45
46 return num_pages;
47}
48
49Result KPageGroup::AddBlock(KPhysicalAddress addr, size_t num_pages) {
50 // Succeed immediately if we're adding no pages.
51 R_SUCCEED_IF(num_pages == 0);
52
53 // Check for overflow.
54 ASSERT(addr < addr + num_pages * PageSize);
55
56 // Try to just append to the last block.
57 if (m_last_block != nullptr) {
58 R_SUCCEED_IF(m_last_block->TryConcatenate(addr, num_pages));
59 }
60
61 // Allocate a new block.
62 KBlockInfo* new_block = m_manager->Allocate();
63 R_UNLESS(new_block != nullptr, ResultOutOfResource);
64
65 // Initialize the block.
66 new_block->Initialize(addr, num_pages);
67
68 // Add the block to our list.
69 if (m_last_block != nullptr) {
70 m_last_block->SetNext(new_block);
71 } else {
72 m_first_block = new_block;
73 }
74 m_last_block = new_block;
75
76 R_SUCCEED();
77}
78
79void KPageGroup::Open() const {
80 auto& mm = m_kernel.MemoryManager();
81
82 for (const auto& it : *this) {
83 mm.Open(it.GetAddress(), it.GetNumPages());
84 }
85}
86
87void KPageGroup::OpenFirst() const {
88 auto& mm = m_kernel.MemoryManager();
89
90 for (const auto& it : *this) {
91 mm.OpenFirst(it.GetAddress(), it.GetNumPages());
92 }
93}
94
95void KPageGroup::Close() const {
96 auto& mm = m_kernel.MemoryManager();
97
98 for (const auto& it : *this) {
99 mm.Close(it.GetAddress(), it.GetNumPages());
100 }
101}
102
103bool KPageGroup::IsEquivalentTo(const KPageGroup& rhs) const {
104 auto lit = this->begin();
105 auto rit = rhs.begin();
106 auto lend = this->end();
107 auto rend = rhs.end();
108
109 while (lit != lend && rit != rend) {
110 if (*lit != *rit) {
111 return false;
112 }
113
114 ++lit;
115 ++rit;
116 }
117
118 return lit == lend && rit == rend;
119}
120
121} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_group.h b/src/core/hle/kernel/k_page_group.h
index 316f172f2..c07f17663 100644
--- a/src/core/hle/kernel/k_page_group.h
+++ b/src/core/hle/kernel/k_page_group.h
@@ -1,4 +1,4 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#pragma once 4#pragma once
@@ -13,24 +13,23 @@
13 13
14namespace Kernel { 14namespace Kernel {
15 15
16class KBlockInfoManager;
17class KernelCore;
16class KPageGroup; 18class KPageGroup;
17 19
18class KBlockInfo { 20class KBlockInfo {
19private:
20 friend class KPageGroup;
21
22public: 21public:
23 constexpr KBlockInfo() = default; 22 constexpr explicit KBlockInfo() : m_next(nullptr) {}
24 23
25 constexpr void Initialize(PAddr addr, size_t np) { 24 constexpr void Initialize(KPhysicalAddress addr, size_t np) {
26 ASSERT(Common::IsAligned(addr, PageSize)); 25 ASSERT(Common::IsAligned(addr, PageSize));
27 ASSERT(static_cast<u32>(np) == np); 26 ASSERT(static_cast<u32>(np) == np);
28 27
29 m_page_index = static_cast<u32>(addr) / PageSize; 28 m_page_index = static_cast<u32>(addr / PageSize);
30 m_num_pages = static_cast<u32>(np); 29 m_num_pages = static_cast<u32>(np);
31 } 30 }
32 31
33 constexpr PAddr GetAddress() const { 32 constexpr KPhysicalAddress GetAddress() const {
34 return m_page_index * PageSize; 33 return m_page_index * PageSize;
35 } 34 }
36 constexpr size_t GetNumPages() const { 35 constexpr size_t GetNumPages() const {
@@ -39,10 +38,10 @@ public:
39 constexpr size_t GetSize() const { 38 constexpr size_t GetSize() const {
40 return this->GetNumPages() * PageSize; 39 return this->GetNumPages() * PageSize;
41 } 40 }
42 constexpr PAddr GetEndAddress() const { 41 constexpr KPhysicalAddress GetEndAddress() const {
43 return (m_page_index + m_num_pages) * PageSize; 42 return (m_page_index + m_num_pages) * PageSize;
44 } 43 }
45 constexpr PAddr GetLastAddress() const { 44 constexpr KPhysicalAddress GetLastAddress() const {
46 return this->GetEndAddress() - 1; 45 return this->GetEndAddress() - 1;
47 } 46 }
48 47
@@ -62,8 +61,8 @@ public:
62 return !(*this == rhs); 61 return !(*this == rhs);
63 } 62 }
64 63
65 constexpr bool IsStrictlyBefore(PAddr addr) const { 64 constexpr bool IsStrictlyBefore(KPhysicalAddress addr) const {
66 const PAddr end = this->GetEndAddress(); 65 const KPhysicalAddress end = this->GetEndAddress();
67 66
68 if (m_page_index != 0 && end == 0) { 67 if (m_page_index != 0 && end == 0) {
69 return false; 68 return false;
@@ -72,11 +71,11 @@ public:
72 return end < addr; 71 return end < addr;
73 } 72 }
74 73
75 constexpr bool operator<(PAddr addr) const { 74 constexpr bool operator<(KPhysicalAddress addr) const {
76 return this->IsStrictlyBefore(addr); 75 return this->IsStrictlyBefore(addr);
77 } 76 }
78 77
79 constexpr bool TryConcatenate(PAddr addr, size_t np) { 78 constexpr bool TryConcatenate(KPhysicalAddress addr, size_t np) {
80 if (addr != 0 && addr == this->GetEndAddress()) { 79 if (addr != 0 && addr == this->GetEndAddress()) {
81 m_num_pages += static_cast<u32>(np); 80 m_num_pages += static_cast<u32>(np);
82 return true; 81 return true;
@@ -90,96 +89,118 @@ private:
90 } 89 }
91 90
92private: 91private:
92 friend class KPageGroup;
93
93 KBlockInfo* m_next{}; 94 KBlockInfo* m_next{};
94 u32 m_page_index{}; 95 u32 m_page_index{};
95 u32 m_num_pages{}; 96 u32 m_num_pages{};
96}; 97};
97static_assert(sizeof(KBlockInfo) <= 0x10); 98static_assert(sizeof(KBlockInfo) <= 0x10);
98 99
99class KPageGroup final { 100class KPageGroup {
100public: 101public:
101 class Node final { 102 class Iterator {
102 public: 103 public:
103 constexpr Node(u64 addr_, std::size_t num_pages_) : addr{addr_}, num_pages{num_pages_} {} 104 using iterator_category = std::forward_iterator_tag;
105 using value_type = const KBlockInfo;
106 using difference_type = std::ptrdiff_t;
107 using pointer = value_type*;
108 using reference = value_type&;
109
110 constexpr explicit Iterator(pointer n) : m_node(n) {}
111
112 constexpr bool operator==(const Iterator& rhs) const {
113 return m_node == rhs.m_node;
114 }
115 constexpr bool operator!=(const Iterator& rhs) const {
116 return !(*this == rhs);
117 }
104 118
105 constexpr u64 GetAddress() const { 119 constexpr pointer operator->() const {
106 return addr; 120 return m_node;
121 }
122 constexpr reference operator*() const {
123 return *m_node;
107 } 124 }
108 125
109 constexpr std::size_t GetNumPages() const { 126 constexpr Iterator& operator++() {
110 return num_pages; 127 m_node = m_node->GetNext();
128 return *this;
111 } 129 }
112 130
113 constexpr std::size_t GetSize() const { 131 constexpr Iterator operator++(int) {
114 return GetNumPages() * PageSize; 132 const Iterator it{*this};
133 ++(*this);
134 return it;
115 } 135 }
116 136
117 private: 137 private:
118 u64 addr{}; 138 pointer m_node{};
119 std::size_t num_pages{};
120 }; 139 };
121 140
122public: 141 explicit KPageGroup(KernelCore& kernel, KBlockInfoManager* m)
123 KPageGroup() = default; 142 : m_kernel{kernel}, m_manager{m} {}
124 KPageGroup(u64 address, u64 num_pages) { 143 ~KPageGroup() {
125 ASSERT(AddBlock(address, num_pages).IsSuccess()); 144 this->Finalize();
126 } 145 }
127 146
128 constexpr std::list<Node>& Nodes() { 147 void CloseAndReset();
129 return nodes; 148 void Finalize();
130 }
131 149
132 constexpr const std::list<Node>& Nodes() const { 150 Iterator begin() const {
133 return nodes; 151 return Iterator{m_first_block};
152 }
153 Iterator end() const {
154 return Iterator{nullptr};
155 }
156 bool empty() const {
157 return m_first_block == nullptr;
134 } 158 }
135 159
136 std::size_t GetNumPages() const { 160 Result AddBlock(KPhysicalAddress addr, size_t num_pages);
137 std::size_t num_pages = 0; 161 void Open() const;
138 for (const Node& node : nodes) { 162 void OpenFirst() const;
139 num_pages += node.GetNumPages(); 163 void Close() const;
140 } 164
141 return num_pages; 165 size_t GetNumPages() const;
142 } 166
143 167 bool IsEquivalentTo(const KPageGroup& rhs) const;
144 bool IsEqual(KPageGroup& other) const { 168
145 auto this_node = nodes.begin(); 169 bool operator==(const KPageGroup& rhs) const {
146 auto other_node = other.nodes.begin(); 170 return this->IsEquivalentTo(rhs);
147 while (this_node != nodes.end() && other_node != other.nodes.end()) { 171 }
148 if (this_node->GetAddress() != other_node->GetAddress() ||
149 this_node->GetNumPages() != other_node->GetNumPages()) {
150 return false;
151 }
152 this_node = std::next(this_node);
153 other_node = std::next(other_node);
154 }
155 172
156 return this_node == nodes.end() && other_node == other.nodes.end(); 173 bool operator!=(const KPageGroup& rhs) const {
174 return !(*this == rhs);
157 } 175 }
158 176
159 Result AddBlock(u64 address, u64 num_pages) { 177private:
160 if (!num_pages) { 178 KernelCore& m_kernel;
161 return ResultSuccess; 179 KBlockInfo* m_first_block{};
180 KBlockInfo* m_last_block{};
181 KBlockInfoManager* m_manager{};
182};
183
184class KScopedPageGroup {
185public:
186 explicit KScopedPageGroup(const KPageGroup* gp) : m_pg(gp) {
187 if (m_pg) {
188 m_pg->Open();
162 } 189 }
163 if (!nodes.empty()) { 190 }
164 const auto node = nodes.back(); 191 explicit KScopedPageGroup(const KPageGroup& gp) : KScopedPageGroup(std::addressof(gp)) {}
165 if (node.GetAddress() + node.GetNumPages() * PageSize == address) { 192 ~KScopedPageGroup() {
166 address = node.GetAddress(); 193 if (m_pg) {
167 num_pages += node.GetNumPages(); 194 m_pg->Close();
168 nodes.pop_back();
169 }
170 } 195 }
171 nodes.push_back({address, num_pages});
172 return ResultSuccess;
173 } 196 }
174 197
175 bool Empty() const { 198 void CancelClose() {
176 return nodes.empty(); 199 m_pg = nullptr;
177 } 200 }
178 201
179 void Finalize() {}
180
181private: 202private:
182 std::list<Node> nodes; 203 const KPageGroup* m_pg{};
183}; 204};
184 205
185} // namespace Kernel 206} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 612fc76fa..9c7ac22dc 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -100,7 +100,7 @@ constexpr size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType a
100 100
101KPageTable::KPageTable(Core::System& system_) 101KPageTable::KPageTable(Core::System& system_)
102 : m_general_lock{system_.Kernel()}, 102 : m_general_lock{system_.Kernel()},
103 m_map_physical_memory_lock{system_.Kernel()}, m_system{system_} {} 103 m_map_physical_memory_lock{system_.Kernel()}, m_system{system_}, m_kernel{system_.Kernel()} {}
104 104
105KPageTable::~KPageTable() = default; 105KPageTable::~KPageTable() = default;
106 106
@@ -373,7 +373,7 @@ Result KPageTable::MapProcessCode(VAddr addr, size_t num_pages, KMemoryState sta
373 m_memory_block_slab_manager); 373 m_memory_block_slab_manager);
374 374
375 // Allocate and open. 375 // Allocate and open.
376 KPageGroup pg; 376 KPageGroup pg{m_kernel, m_block_info_manager};
377 R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( 377 R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
378 &pg, num_pages, 378 &pg, num_pages,
379 KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, m_allocation_option))); 379 KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, m_allocation_option)));
@@ -432,7 +432,7 @@ Result KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, size_t si
432 const size_t num_pages = size / PageSize; 432 const size_t num_pages = size / PageSize;
433 433
434 // Create page groups for the memory being mapped. 434 // Create page groups for the memory being mapped.
435 KPageGroup pg; 435 KPageGroup pg{m_kernel, m_block_info_manager};
436 AddRegionToPages(src_address, num_pages, pg); 436 AddRegionToPages(src_address, num_pages, pg);
437 437
438 // Reprotect the source as kernel-read/not mapped. 438 // Reprotect the source as kernel-read/not mapped.
@@ -593,7 +593,7 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) {
593 const size_t size = num_pages * PageSize; 593 const size_t size = num_pages * PageSize;
594 594
595 // We're making a new group, not adding to an existing one. 595 // We're making a new group, not adding to an existing one.
596 R_UNLESS(pg.Empty(), ResultInvalidCurrentMemory); 596 R_UNLESS(pg.empty(), ResultInvalidCurrentMemory);
597 597
598 // Begin traversal. 598 // Begin traversal.
599 Common::PageTable::TraversalContext context; 599 Common::PageTable::TraversalContext context;
@@ -640,11 +640,10 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) {
640 R_SUCCEED(); 640 R_SUCCEED();
641} 641}
642 642
643bool KPageTable::IsValidPageGroup(const KPageGroup& pg_ll, VAddr addr, size_t num_pages) { 643bool KPageTable::IsValidPageGroup(const KPageGroup& pg, VAddr addr, size_t num_pages) {
644 ASSERT(this->IsLockedByCurrentThread()); 644 ASSERT(this->IsLockedByCurrentThread());
645 645
646 const size_t size = num_pages * PageSize; 646 const size_t size = num_pages * PageSize;
647 const auto& pg = pg_ll.Nodes();
648 const auto& memory_layout = m_system.Kernel().MemoryLayout(); 647 const auto& memory_layout = m_system.Kernel().MemoryLayout();
649 648
650 // Empty groups are necessarily invalid. 649 // Empty groups are necessarily invalid.
@@ -942,9 +941,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
942 941
943 ON_RESULT_FAILURE { 942 ON_RESULT_FAILURE {
944 if (cur_mapped_addr != dst_addr) { 943 if (cur_mapped_addr != dst_addr) {
945 // HACK: Manually close the pages.
946 HACK_ClosePages(dst_addr, (cur_mapped_addr - dst_addr) / PageSize);
947
948 ASSERT(Operate(dst_addr, (cur_mapped_addr - dst_addr) / PageSize, 944 ASSERT(Operate(dst_addr, (cur_mapped_addr - dst_addr) / PageSize,
949 KMemoryPermission::None, OperationType::Unmap) 945 KMemoryPermission::None, OperationType::Unmap)
950 .IsSuccess()); 946 .IsSuccess());
@@ -1020,9 +1016,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
1020 // Map the page. 1016 // Map the page.
1021 R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, start_partial_page)); 1017 R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, start_partial_page));
1022 1018
1023 // HACK: Manually open the pages.
1024 HACK_OpenPages(start_partial_page, 1);
1025
1026 // Update tracking extents. 1019 // Update tracking extents.
1027 cur_mapped_addr += PageSize; 1020 cur_mapped_addr += PageSize;
1028 cur_block_addr += PageSize; 1021 cur_block_addr += PageSize;
@@ -1051,9 +1044,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
1051 R_TRY(Operate(cur_mapped_addr, cur_block_size / PageSize, test_perm, OperationType::Map, 1044 R_TRY(Operate(cur_mapped_addr, cur_block_size / PageSize, test_perm, OperationType::Map,
1052 cur_block_addr)); 1045 cur_block_addr));
1053 1046
1054 // HACK: Manually open the pages.
1055 HACK_OpenPages(cur_block_addr, cur_block_size / PageSize);
1056
1057 // Update tracking extents. 1047 // Update tracking extents.
1058 cur_mapped_addr += cur_block_size; 1048 cur_mapped_addr += cur_block_size;
1059 cur_block_addr = next_entry.phys_addr; 1049 cur_block_addr = next_entry.phys_addr;
@@ -1073,9 +1063,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
1073 R_TRY(Operate(cur_mapped_addr, last_block_size / PageSize, test_perm, OperationType::Map, 1063 R_TRY(Operate(cur_mapped_addr, last_block_size / PageSize, test_perm, OperationType::Map,
1074 cur_block_addr)); 1064 cur_block_addr));
1075 1065
1076 // HACK: Manually open the pages.
1077 HACK_OpenPages(cur_block_addr, last_block_size / PageSize);
1078
1079 // Update tracking extents. 1066 // Update tracking extents.
1080 cur_mapped_addr += last_block_size; 1067 cur_mapped_addr += last_block_size;
1081 cur_block_addr += last_block_size; 1068 cur_block_addr += last_block_size;
@@ -1107,9 +1094,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
1107 1094
1108 // Map the page. 1095 // Map the page.
1109 R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, end_partial_page)); 1096 R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, end_partial_page));
1110
1111 // HACK: Manually open the pages.
1112 HACK_OpenPages(end_partial_page, 1);
1113 } 1097 }
1114 1098
1115 // Update memory blocks to reflect our changes 1099 // Update memory blocks to reflect our changes
@@ -1211,9 +1195,6 @@ Result KPageTable::CleanupForIpcServer(VAddr address, size_t size, KMemoryState
1211 const size_t aligned_size = aligned_end - aligned_start; 1195 const size_t aligned_size = aligned_end - aligned_start;
1212 const size_t aligned_num_pages = aligned_size / PageSize; 1196 const size_t aligned_num_pages = aligned_size / PageSize;
1213 1197
1214 // HACK: Manually close the pages.
1215 HACK_ClosePages(aligned_start, aligned_num_pages);
1216
1217 // Unmap the pages. 1198 // Unmap the pages.
1218 R_TRY(Operate(aligned_start, aligned_num_pages, KMemoryPermission::None, OperationType::Unmap)); 1199 R_TRY(Operate(aligned_start, aligned_num_pages, KMemoryPermission::None, OperationType::Unmap));
1219 1200
@@ -1501,17 +1482,6 @@ void KPageTable::CleanupForIpcClientOnServerSetupFailure([[maybe_unused]] PageLi
1501 } 1482 }
1502} 1483}
1503 1484
1504void KPageTable::HACK_OpenPages(PAddr phys_addr, size_t num_pages) {
1505 m_system.Kernel().MemoryManager().OpenFirst(phys_addr, num_pages);
1506}
1507
1508void KPageTable::HACK_ClosePages(VAddr virt_addr, size_t num_pages) {
1509 for (size_t index = 0; index < num_pages; ++index) {
1510 const auto paddr = GetPhysicalAddr(virt_addr + (index * PageSize));
1511 m_system.Kernel().MemoryManager().Close(paddr, 1);
1512 }
1513}
1514
1515Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) { 1485Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1516 // Lock the physical memory lock. 1486 // Lock the physical memory lock.
1517 KScopedLightLock phys_lk(m_map_physical_memory_lock); 1487 KScopedLightLock phys_lk(m_map_physical_memory_lock);
@@ -1572,7 +1542,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1572 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); 1542 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
1573 1543
1574 // Allocate pages for the new memory. 1544 // Allocate pages for the new memory.
1575 KPageGroup pg; 1545 KPageGroup pg{m_kernel, m_block_info_manager};
1576 R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess( 1546 R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess(
1577 &pg, (size - mapped_size) / PageSize, m_allocate_option, 0, 0)); 1547 &pg, (size - mapped_size) / PageSize, m_allocate_option, 0, 0));
1578 1548
@@ -1650,7 +1620,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1650 KScopedPageTableUpdater updater(this); 1620 KScopedPageTableUpdater updater(this);
1651 1621
1652 // Prepare to iterate over the memory. 1622 // Prepare to iterate over the memory.
1653 auto pg_it = pg.Nodes().begin(); 1623 auto pg_it = pg.begin();
1654 PAddr pg_phys_addr = pg_it->GetAddress(); 1624 PAddr pg_phys_addr = pg_it->GetAddress();
1655 size_t pg_pages = pg_it->GetNumPages(); 1625 size_t pg_pages = pg_it->GetNumPages();
1656 1626
@@ -1680,9 +1650,6 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1680 last_unmap_address + 1 - cur_address) / 1650 last_unmap_address + 1 - cur_address) /
1681 PageSize; 1651 PageSize;
1682 1652
1683 // HACK: Manually close the pages.
1684 HACK_ClosePages(cur_address, cur_pages);
1685
1686 // Unmap. 1653 // Unmap.
1687 ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, 1654 ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None,
1688 OperationType::Unmap) 1655 OperationType::Unmap)
@@ -1703,7 +1670,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1703 // Release any remaining unmapped memory. 1670 // Release any remaining unmapped memory.
1704 m_system.Kernel().MemoryManager().OpenFirst(pg_phys_addr, pg_pages); 1671 m_system.Kernel().MemoryManager().OpenFirst(pg_phys_addr, pg_pages);
1705 m_system.Kernel().MemoryManager().Close(pg_phys_addr, pg_pages); 1672 m_system.Kernel().MemoryManager().Close(pg_phys_addr, pg_pages);
1706 for (++pg_it; pg_it != pg.Nodes().end(); ++pg_it) { 1673 for (++pg_it; pg_it != pg.end(); ++pg_it) {
1707 m_system.Kernel().MemoryManager().OpenFirst(pg_it->GetAddress(), 1674 m_system.Kernel().MemoryManager().OpenFirst(pg_it->GetAddress(),
1708 pg_it->GetNumPages()); 1675 pg_it->GetNumPages());
1709 m_system.Kernel().MemoryManager().Close(pg_it->GetAddress(), 1676 m_system.Kernel().MemoryManager().Close(pg_it->GetAddress(),
@@ -1731,7 +1698,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1731 // Check if we're at the end of the physical block. 1698 // Check if we're at the end of the physical block.
1732 if (pg_pages == 0) { 1699 if (pg_pages == 0) {
1733 // Ensure there are more pages to map. 1700 // Ensure there are more pages to map.
1734 ASSERT(pg_it != pg.Nodes().end()); 1701 ASSERT(pg_it != pg.end());
1735 1702
1736 // Advance our physical block. 1703 // Advance our physical block.
1737 ++pg_it; 1704 ++pg_it;
@@ -1742,10 +1709,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
1742 // Map whatever we can. 1709 // Map whatever we can.
1743 const size_t cur_pages = std::min(pg_pages, map_pages); 1710 const size_t cur_pages = std::min(pg_pages, map_pages);
1744 R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite, 1711 R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite,
1745 OperationType::Map, pg_phys_addr)); 1712 OperationType::MapFirst, pg_phys_addr));
1746
1747 // HACK: Manually open the pages.
1748 HACK_OpenPages(pg_phys_addr, cur_pages);
1749 1713
1750 // Advance. 1714 // Advance.
1751 cur_address += cur_pages * PageSize; 1715 cur_address += cur_pages * PageSize;
@@ -1888,9 +1852,6 @@ Result KPageTable::UnmapPhysicalMemory(VAddr address, size_t size) {
1888 last_address + 1 - cur_address) / 1852 last_address + 1 - cur_address) /
1889 PageSize; 1853 PageSize;
1890 1854
1891 // HACK: Manually close the pages.
1892 HACK_ClosePages(cur_address, cur_pages);
1893
1894 // Unmap. 1855 // Unmap.
1895 ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap) 1856 ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap)
1896 .IsSuccess()); 1857 .IsSuccess());
@@ -1955,7 +1916,7 @@ Result KPageTable::MapMemory(VAddr dst_address, VAddr src_address, size_t size)
1955 R_TRY(dst_allocator_result); 1916 R_TRY(dst_allocator_result);
1956 1917
1957 // Map the memory. 1918 // Map the memory.
1958 KPageGroup page_linked_list; 1919 KPageGroup page_linked_list{m_kernel, m_block_info_manager};
1959 const size_t num_pages{size / PageSize}; 1920 const size_t num_pages{size / PageSize};
1960 const KMemoryPermission new_src_perm = static_cast<KMemoryPermission>( 1921 const KMemoryPermission new_src_perm = static_cast<KMemoryPermission>(
1961 KMemoryPermission::KernelRead | KMemoryPermission::NotMapped); 1922 KMemoryPermission::KernelRead | KMemoryPermission::NotMapped);
@@ -2022,14 +1983,14 @@ Result KPageTable::UnmapMemory(VAddr dst_address, VAddr src_address, size_t size
2022 num_dst_allocator_blocks); 1983 num_dst_allocator_blocks);
2023 R_TRY(dst_allocator_result); 1984 R_TRY(dst_allocator_result);
2024 1985
2025 KPageGroup src_pages; 1986 KPageGroup src_pages{m_kernel, m_block_info_manager};
2026 KPageGroup dst_pages; 1987 KPageGroup dst_pages{m_kernel, m_block_info_manager};
2027 const size_t num_pages{size / PageSize}; 1988 const size_t num_pages{size / PageSize};
2028 1989
2029 AddRegionToPages(src_address, num_pages, src_pages); 1990 AddRegionToPages(src_address, num_pages, src_pages);
2030 AddRegionToPages(dst_address, num_pages, dst_pages); 1991 AddRegionToPages(dst_address, num_pages, dst_pages);
2031 1992
2032 R_UNLESS(dst_pages.IsEqual(src_pages), ResultInvalidMemoryRegion); 1993 R_UNLESS(dst_pages.IsEquivalentTo(src_pages), ResultInvalidMemoryRegion);
2033 1994
2034 { 1995 {
2035 auto block_guard = detail::ScopeExit([&] { MapPages(dst_address, dst_pages, dst_perm); }); 1996 auto block_guard = detail::ScopeExit([&] { MapPages(dst_address, dst_pages, dst_perm); });
@@ -2060,7 +2021,7 @@ Result KPageTable::MapPages(VAddr addr, const KPageGroup& page_linked_list,
2060 2021
2061 VAddr cur_addr{addr}; 2022 VAddr cur_addr{addr};
2062 2023
2063 for (const auto& node : page_linked_list.Nodes()) { 2024 for (const auto& node : page_linked_list) {
2064 if (const auto result{ 2025 if (const auto result{
2065 Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())}; 2026 Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())};
2066 result.IsError()) { 2027 result.IsError()) {
@@ -2160,7 +2121,7 @@ Result KPageTable::UnmapPages(VAddr addr, const KPageGroup& page_linked_list) {
2160 2121
2161 VAddr cur_addr{addr}; 2122 VAddr cur_addr{addr};
2162 2123
2163 for (const auto& node : page_linked_list.Nodes()) { 2124 for (const auto& node : page_linked_list) {
2164 if (const auto result{Operate(cur_addr, node.GetNumPages(), KMemoryPermission::None, 2125 if (const auto result{Operate(cur_addr, node.GetNumPages(), KMemoryPermission::None,
2165 OperationType::Unmap)}; 2126 OperationType::Unmap)};
2166 result.IsError()) { 2127 result.IsError()) {
@@ -2527,13 +2488,13 @@ Result KPageTable::SetHeapSize(VAddr* out, size_t size) {
2527 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); 2488 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
2528 2489
2529 // Allocate pages for the heap extension. 2490 // Allocate pages for the heap extension.
2530 KPageGroup pg; 2491 KPageGroup pg{m_kernel, m_block_info_manager};
2531 R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen( 2492 R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
2532 &pg, allocation_size / PageSize, 2493 &pg, allocation_size / PageSize,
2533 KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option))); 2494 KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option)));
2534 2495
2535 // Clear all the newly allocated pages. 2496 // Clear all the newly allocated pages.
2536 for (const auto& it : pg.Nodes()) { 2497 for (const auto& it : pg) {
2537 std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()), m_heap_fill_value, 2498 std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()), m_heap_fill_value,
2538 it.GetSize()); 2499 it.GetSize());
2539 } 2500 }
@@ -2610,11 +2571,23 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(size_t needed_num_pages, size_
2610 if (is_map_only) { 2571 if (is_map_only) {
2611 R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); 2572 R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
2612 } else { 2573 } else {
2613 KPageGroup page_group; 2574 // Create a page group tohold the pages we allocate.
2614 R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess( 2575 KPageGroup pg{m_kernel, m_block_info_manager};
2615 &page_group, needed_num_pages, 2576
2616 KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option), 0, 0)); 2577 R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
2617 R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); 2578 &pg, needed_num_pages,
2579 KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option)));
2580
2581 // Ensure that the page group is closed when we're done working with it.
2582 SCOPE_EXIT({ pg.Close(); });
2583
2584 // Clear all pages.
2585 for (const auto& it : pg) {
2586 std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()),
2587 m_heap_fill_value, it.GetSize());
2588 }
2589
2590 R_TRY(Operate(addr, needed_num_pages, pg, OperationType::MapGroup));
2618 } 2591 }
2619 2592
2620 // Update the blocks. 2593 // Update the blocks.
@@ -2795,19 +2768,28 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, const KPageGroup& page_
2795 ASSERT(num_pages > 0); 2768 ASSERT(num_pages > 0);
2796 ASSERT(num_pages == page_group.GetNumPages()); 2769 ASSERT(num_pages == page_group.GetNumPages());
2797 2770
2798 for (const auto& node : page_group.Nodes()) { 2771 switch (operation) {
2799 const size_t size{node.GetNumPages() * PageSize}; 2772 case OperationType::MapGroup: {
2773 // We want to maintain a new reference to every page in the group.
2774 KScopedPageGroup spg(page_group);
2775
2776 for (const auto& node : page_group) {
2777 const size_t size{node.GetNumPages() * PageSize};
2800 2778
2801 switch (operation) { 2779 // Map the pages.
2802 case OperationType::MapGroup:
2803 m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, size, node.GetAddress()); 2780 m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, size, node.GetAddress());
2804 break; 2781
2805 default: 2782 addr += size;
2806 ASSERT(false);
2807 break;
2808 } 2783 }
2809 2784
2810 addr += size; 2785 // We succeeded! We want to persist the reference to the pages.
2786 spg.CancelClose();
2787
2788 break;
2789 }
2790 default:
2791 ASSERT(false);
2792 break;
2811 } 2793 }
2812 2794
2813 R_SUCCEED(); 2795 R_SUCCEED();
@@ -2822,13 +2804,29 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, KMemoryPermission perm,
2822 ASSERT(ContainsPages(addr, num_pages)); 2804 ASSERT(ContainsPages(addr, num_pages));
2823 2805
2824 switch (operation) { 2806 switch (operation) {
2825 case OperationType::Unmap: 2807 case OperationType::Unmap: {
2808 // Ensure that any pages we track close on exit.
2809 KPageGroup pages_to_close{m_kernel, this->GetBlockInfoManager()};
2810 SCOPE_EXIT({ pages_to_close.CloseAndReset(); });
2811
2812 this->AddRegionToPages(addr, num_pages, pages_to_close);
2826 m_system.Memory().UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize); 2813 m_system.Memory().UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize);
2827 break; 2814 break;
2815 }
2816 case OperationType::MapFirst:
2828 case OperationType::Map: { 2817 case OperationType::Map: {
2829 ASSERT(map_addr); 2818 ASSERT(map_addr);
2830 ASSERT(Common::IsAligned(map_addr, PageSize)); 2819 ASSERT(Common::IsAligned(map_addr, PageSize));
2831 m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, num_pages * PageSize, map_addr); 2820 m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, num_pages * PageSize, map_addr);
2821
2822 // Open references to pages, if we should.
2823 if (IsHeapPhysicalAddress(m_kernel.MemoryLayout(), map_addr)) {
2824 if (operation == OperationType::MapFirst) {
2825 m_kernel.MemoryManager().OpenFirst(map_addr, num_pages);
2826 } else {
2827 m_kernel.MemoryManager().Open(map_addr, num_pages);
2828 }
2829 }
2832 break; 2830 break;
2833 } 2831 }
2834 case OperationType::Separate: { 2832 case OperationType::Separate: {
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index f1ca785d7..0a454b05b 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -107,6 +107,10 @@ public:
107 return *m_page_table_impl; 107 return *m_page_table_impl;
108 } 108 }
109 109
110 KBlockInfoManager* GetBlockInfoManager() {
111 return m_block_info_manager;
112 }
113
110 bool CanContain(VAddr addr, size_t size, KMemoryState state) const; 114 bool CanContain(VAddr addr, size_t size, KMemoryState state) const;
111 115
112protected: 116protected:
@@ -261,10 +265,6 @@ private:
261 void CleanupForIpcClientOnServerSetupFailure(PageLinkedList* page_list, VAddr address, 265 void CleanupForIpcClientOnServerSetupFailure(PageLinkedList* page_list, VAddr address,
262 size_t size, KMemoryPermission prot_perm); 266 size_t size, KMemoryPermission prot_perm);
263 267
264 // HACK: These will be removed once we automatically manage page reference counts.
265 void HACK_OpenPages(PAddr phys_addr, size_t num_pages);
266 void HACK_ClosePages(VAddr virt_addr, size_t num_pages);
267
268 mutable KLightLock m_general_lock; 268 mutable KLightLock m_general_lock;
269 mutable KLightLock m_map_physical_memory_lock; 269 mutable KLightLock m_map_physical_memory_lock;
270 270
@@ -488,6 +488,7 @@ private:
488 std::unique_ptr<Common::PageTable> m_page_table_impl; 488 std::unique_ptr<Common::PageTable> m_page_table_impl;
489 489
490 Core::System& m_system; 490 Core::System& m_system;
491 KernelCore& m_kernel;
491}; 492};
492 493
493} // namespace Kernel 494} // namespace Kernel
diff --git a/src/core/hle/kernel/k_shared_memory.cpp b/src/core/hle/kernel/k_shared_memory.cpp
index 0aa68103c..3cf2b5d91 100644
--- a/src/core/hle/kernel/k_shared_memory.cpp
+++ b/src/core/hle/kernel/k_shared_memory.cpp
@@ -13,10 +13,7 @@
13namespace Kernel { 13namespace Kernel {
14 14
15KSharedMemory::KSharedMemory(KernelCore& kernel_) : KAutoObjectWithSlabHeapAndContainer{kernel_} {} 15KSharedMemory::KSharedMemory(KernelCore& kernel_) : KAutoObjectWithSlabHeapAndContainer{kernel_} {}
16 16KSharedMemory::~KSharedMemory() = default;
17KSharedMemory::~KSharedMemory() {
18 kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemoryMax, size);
19}
20 17
21Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* owner_process_, 18Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* owner_process_,
22 Svc::MemoryPermission owner_permission_, 19 Svc::MemoryPermission owner_permission_,
@@ -49,7 +46,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
49 R_UNLESS(physical_address != 0, ResultOutOfMemory); 46 R_UNLESS(physical_address != 0, ResultOutOfMemory);
50 47
51 //! Insert the result into our page group. 48 //! Insert the result into our page group.
52 page_group.emplace(physical_address, num_pages); 49 page_group.emplace(kernel, &kernel.GetSystemSystemResource().GetBlockInfoManager());
50 page_group->AddBlock(physical_address, num_pages);
53 51
54 // Commit our reservation. 52 // Commit our reservation.
55 memory_reservation.Commit(); 53 memory_reservation.Commit();
@@ -62,7 +60,7 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
62 is_initialized = true; 60 is_initialized = true;
63 61
64 // Clear all pages in the memory. 62 // Clear all pages in the memory.
65 for (const auto& block : page_group->Nodes()) { 63 for (const auto& block : *page_group) {
66 std::memset(device_memory_.GetPointer<void>(block.GetAddress()), 0, block.GetSize()); 64 std::memset(device_memory_.GetPointer<void>(block.GetAddress()), 0, block.GetSize());
67 } 65 }
68 66
@@ -71,13 +69,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
71 69
72void KSharedMemory::Finalize() { 70void KSharedMemory::Finalize() {
73 // Close and finalize the page group. 71 // Close and finalize the page group.
74 // page_group->Close(); 72 page_group->Close();
75 // page_group->Finalize(); 73 page_group->Finalize();
76
77 //! HACK: Manually close.
78 for (const auto& block : page_group->Nodes()) {
79 kernel.MemoryManager().Close(block.GetAddress(), block.GetNumPages());
80 }
81 74
82 // Release the memory reservation. 75 // Release the memory reservation.
83 resource_limit->Release(LimitableResource::PhysicalMemoryMax, size); 76 resource_limit->Release(LimitableResource::PhysicalMemoryMax, size);
diff --git a/src/core/hle/kernel/memory_types.h b/src/core/hle/kernel/memory_types.h
index 3975507bd..92b8b37ac 100644
--- a/src/core/hle/kernel/memory_types.h
+++ b/src/core/hle/kernel/memory_types.h
@@ -14,4 +14,7 @@ constexpr std::size_t PageSize{1 << PageBits};
14 14
15using Page = std::array<u8, PageSize>; 15using Page = std::array<u8, PageSize>;
16 16
17using KPhysicalAddress = PAddr;
18using KProcessAddress = VAddr;
19
17} // namespace Kernel 20} // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 788ee2160..aca442196 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1485,7 +1485,7 @@ static Result MapProcessMemory(Core::System& system, VAddr dst_address, Handle p
1485 ResultInvalidMemoryRegion); 1485 ResultInvalidMemoryRegion);
1486 1486
1487 // Create a new page group. 1487 // Create a new page group.
1488 KPageGroup pg; 1488 KPageGroup pg{system.Kernel(), dst_pt.GetBlockInfoManager()};
1489 R_TRY(src_pt.MakeAndOpenPageGroup( 1489 R_TRY(src_pt.MakeAndOpenPageGroup(
1490 std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess, 1490 std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess,
1491 KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None, 1491 KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None,
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index d1cbadde4..f4416f5b2 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -312,8 +312,6 @@ void NVFlinger::Compose() {
312} 312}
313 313
314s64 NVFlinger::GetNextTicks() const { 314s64 NVFlinger::GetNextTicks() const {
315 static constexpr s64 max_hertz = 120LL;
316
317 const auto& settings = Settings::values; 315 const auto& settings = Settings::values;
318 auto speed_scale = 1.f; 316 auto speed_scale = 1.f;
319 if (settings.use_multi_core.GetValue()) { 317 if (settings.use_multi_core.GetValue()) {
@@ -327,9 +325,11 @@ s64 NVFlinger::GetNextTicks() const {
327 } 325 }
328 } 326 }
329 327
330 const auto next_ticks = ((1000000000 * (1LL << swap_interval)) / max_hertz); 328 // As an extension, treat nonpositive swap interval as framerate multiplier.
329 const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval)
330 : 60.f / static_cast<f32>(swap_interval);
331 331
332 return static_cast<s64>(speed_scale * static_cast<float>(next_ticks)); 332 return static_cast<s64>(speed_scale * (1000000000.f / effective_fps));
333} 333}
334 334
335} // namespace Service::NVFlinger 335} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 9b22397db..3828cf272 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -133,7 +133,7 @@ private:
133 /// layers. 133 /// layers.
134 u32 next_buffer_queue_id = 1; 134 u32 next_buffer_queue_id = 1;
135 135
136 u32 swap_interval = 1; 136 s32 swap_interval = 1;
137 137
138 /// Event that handles screen composition. 138 /// Event that handles screen composition.
139 std::shared_ptr<Core::Timing::EventType> multi_composition_event; 139 std::shared_ptr<Core::Timing::EventType> multi_composition_event;
diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp
index 447fbffaa..282ea1ff9 100644
--- a/src/core/internal_network/network.cpp
+++ b/src/core/internal_network/network.cpp
@@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
117 return Errno::NETUNREACH; 117 return Errno::NETUNREACH;
118 case WSAEMSGSIZE: 118 case WSAEMSGSIZE:
119 return Errno::MSGSIZE; 119 return Errno::MSGSIZE;
120 case WSAETIMEDOUT:
121 return Errno::TIMEDOUT;
120 default: 122 default:
121 UNIMPLEMENTED_MSG("Unimplemented errno={}", e); 123 UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
122 return Errno::OTHER; 124 return Errno::OTHER;
@@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
211 return Errno::NETUNREACH; 213 return Errno::NETUNREACH;
212 case EMSGSIZE: 214 case EMSGSIZE:
213 return Errno::MSGSIZE; 215 return Errno::MSGSIZE;
216 case ETIMEDOUT:
217 return Errno::TIMEDOUT;
214 default: 218 default:
215 UNIMPLEMENTED_MSG("Unimplemented errno={}", e); 219 UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
216 return Errno::OTHER; 220 return Errno::OTHER;
@@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
226 int e = errno; 230 int e = errno;
227#endif 231#endif
228 const Errno err = TranslateNativeError(e); 232 const Errno err = TranslateNativeError(e);
229 if (err == Errno::AGAIN) { 233 if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
230 return err; 234 return err;
231 } 235 }
232 LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); 236 LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 26be74df4..4e605fae4 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -383,6 +383,10 @@ struct Memory::Impl {
383 return; 383 return;
384 } 384 }
385 385
386 if (Settings::IsFastmemEnabled()) {
387 system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug);
388 }
389
386 // Iterate over a contiguous CPU address space, marking/unmarking the region. 390 // Iterate over a contiguous CPU address space, marking/unmarking the region.
387 // The region is at a granularity of CPU pages. 391 // The region is at a granularity of CPU pages.
388 392
@@ -436,7 +440,7 @@ struct Memory::Impl {
436 } 440 }
437 441
438 if (Settings::IsFastmemEnabled()) { 442 if (Settings::IsFastmemEnabled()) {
439 const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; 443 const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
440 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); 444 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
441 } 445 }
442 446
diff --git a/src/input_common/drivers/tas_input.cpp b/src/input_common/drivers/tas_input.cpp
index f3ade90da..f3cb14c56 100644
--- a/src/input_common/drivers/tas_input.cpp
+++ b/src/input_common/drivers/tas_input.cpp
@@ -156,10 +156,12 @@ void Tas::RecordInput(u64 buttons, TasAnalog left_axis, TasAnalog right_axis) {
156 }; 156 };
157} 157}
158 158
159std::tuple<TasState, size_t, size_t> Tas::GetStatus() const { 159std::tuple<TasState, size_t, std::array<size_t, PLAYER_NUMBER>> Tas::GetStatus() const {
160 TasState state; 160 TasState state;
161 std::array<size_t, PLAYER_NUMBER> lengths{0};
161 if (is_recording) { 162 if (is_recording) {
162 return {TasState::Recording, 0, record_commands.size()}; 163 lengths[0] = record_commands.size();
164 return {TasState::Recording, record_commands.size(), lengths};
163 } 165 }
164 166
165 if (is_running) { 167 if (is_running) {
@@ -168,7 +170,11 @@ std::tuple<TasState, size_t, size_t> Tas::GetStatus() const {
168 state = TasState::Stopped; 170 state = TasState::Stopped;
169 } 171 }
170 172
171 return {state, current_command, script_length}; 173 for (size_t i = 0; i < PLAYER_NUMBER; i++) {
174 lengths[i] = commands[i].size();
175 }
176
177 return {state, current_command, lengths};
172} 178}
173 179
174void Tas::UpdateThread() { 180void Tas::UpdateThread() {
diff --git a/src/input_common/drivers/tas_input.h b/src/input_common/drivers/tas_input.h
index 38a27a230..5be66d142 100644
--- a/src/input_common/drivers/tas_input.h
+++ b/src/input_common/drivers/tas_input.h
@@ -124,7 +124,7 @@ public:
124 * Current playback progress ; 124 * Current playback progress ;
125 * Total length of script file currently loaded or being recorded 125 * Total length of script file currently loaded or being recorded
126 */ 126 */
127 std::tuple<TasState, size_t, size_t> GetStatus() const; 127 std::tuple<TasState, size_t, std::array<size_t, PLAYER_NUMBER>> GetStatus() const;
128 128
129private: 129private:
130 enum class TasAxis : u8; 130 enum class TasAxis : u8;
diff --git a/src/input_common/helpers/stick_from_buttons.cpp b/src/input_common/helpers/stick_from_buttons.cpp
index 82aa6ac2f..f3a0b3419 100644
--- a/src/input_common/helpers/stick_from_buttons.cpp
+++ b/src/input_common/helpers/stick_from_buttons.cpp
@@ -13,11 +13,11 @@ class Stick final : public Common::Input::InputDevice {
13public: 13public:
14 using Button = std::unique_ptr<Common::Input::InputDevice>; 14 using Button = std::unique_ptr<Common::Input::InputDevice>;
15 15
16 Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, 16 Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, Button updater_,
17 float modifier_scale_, float modifier_angle_) 17 float modifier_scale_, float modifier_angle_)
18 : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)), 18 : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)),
19 right(std::move(right_)), modifier(std::move(modifier_)), modifier_scale(modifier_scale_), 19 right(std::move(right_)), modifier(std::move(modifier_)), updater(std::move(updater_)),
20 modifier_angle(modifier_angle_) { 20 modifier_scale(modifier_scale_), modifier_angle(modifier_angle_) {
21 up->SetCallback({ 21 up->SetCallback({
22 .on_change = 22 .on_change =
23 [this](const Common::Input::CallbackStatus& callback_) { 23 [this](const Common::Input::CallbackStatus& callback_) {
@@ -48,6 +48,9 @@ public:
48 UpdateModButtonStatus(callback_); 48 UpdateModButtonStatus(callback_);
49 }, 49 },
50 }); 50 });
51 updater->SetCallback({
52 .on_change = [this](const Common::Input::CallbackStatus& callback_) { SoftUpdate(); },
53 });
51 last_x_axis_value = 0.0f; 54 last_x_axis_value = 0.0f;
52 last_y_axis_value = 0.0f; 55 last_y_axis_value = 0.0f;
53 } 56 }
@@ -248,7 +251,7 @@ public:
248 modifier->ForceUpdate(); 251 modifier->ForceUpdate();
249 } 252 }
250 253
251 void SoftUpdate() override { 254 void SoftUpdate() {
252 Common::Input::CallbackStatus status{ 255 Common::Input::CallbackStatus status{
253 .type = Common::Input::InputType::Stick, 256 .type = Common::Input::InputType::Stick,
254 .stick_status = GetStatus(), 257 .stick_status = GetStatus(),
@@ -308,6 +311,7 @@ private:
308 Button left; 311 Button left;
309 Button right; 312 Button right;
310 Button modifier; 313 Button modifier;
314 Button updater;
311 float modifier_scale{}; 315 float modifier_scale{};
312 float modifier_angle{}; 316 float modifier_angle{};
313 float angle{}; 317 float angle{};
@@ -331,11 +335,12 @@ std::unique_ptr<Common::Input::InputDevice> StickFromButton::Create(
331 auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine)); 335 auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine));
332 auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine)); 336 auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine));
333 auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine)); 337 auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine));
338 auto updater = Common::Input::CreateInputDeviceFromString("engine:updater,button:0");
334 auto modifier_scale = params.Get("modifier_scale", 0.5f); 339 auto modifier_scale = params.Get("modifier_scale", 0.5f);
335 auto modifier_angle = params.Get("modifier_angle", 5.5f); 340 auto modifier_angle = params.Get("modifier_angle", 5.5f);
336 return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left), 341 return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left),
337 std::move(right), std::move(modifier), modifier_scale, 342 std::move(right), std::move(modifier), std::move(updater),
338 modifier_angle); 343 modifier_scale, modifier_angle);
339} 344}
340 345
341} // namespace InputCommon 346} // namespace InputCommon
diff --git a/src/input_common/input_mapping.cpp b/src/input_common/input_mapping.cpp
index edd5287c1..d6e49d2c5 100644
--- a/src/input_common/input_mapping.cpp
+++ b/src/input_common/input_mapping.cpp
@@ -76,7 +76,7 @@ void MappingFactory::RegisterButton(const MappingData& data) {
76 break; 76 break;
77 case EngineInputType::Analog: 77 case EngineInputType::Analog:
78 // Ignore mouse axis when mapping buttons 78 // Ignore mouse axis when mapping buttons
79 if (data.engine == "mouse") { 79 if (data.engine == "mouse" && data.index != 4) {
80 return; 80 return;
81 } 81 }
82 new_input.Set("axis", data.index); 82 new_input.Set("axis", data.index);
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 4dc92f482..e0b2131ed 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -28,6 +28,28 @@
28 28
29namespace InputCommon { 29namespace InputCommon {
30 30
31/// Dummy engine to get periodic updates
32class UpdateEngine final : public InputEngine {
33public:
34 explicit UpdateEngine(std::string input_engine_) : InputEngine(std::move(input_engine_)) {
35 PreSetController(identifier);
36 }
37
38 void PumpEvents() {
39 SetButton(identifier, 0, last_state);
40 last_state = !last_state;
41 }
42
43private:
44 static constexpr PadIdentifier identifier = {
45 .guid = Common::UUID{},
46 .port = 0,
47 .pad = 0,
48 };
49
50 bool last_state{};
51};
52
31struct InputSubsystem::Impl { 53struct InputSubsystem::Impl {
32 template <typename Engine> 54 template <typename Engine>
33 void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) { 55 void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) {
@@ -45,6 +67,7 @@ struct InputSubsystem::Impl {
45 void Initialize() { 67 void Initialize() {
46 mapping_factory = std::make_shared<MappingFactory>(); 68 mapping_factory = std::make_shared<MappingFactory>();
47 69
70 RegisterEngine("updater", update_engine);
48 RegisterEngine("keyboard", keyboard); 71 RegisterEngine("keyboard", keyboard);
49 RegisterEngine("mouse", mouse); 72 RegisterEngine("mouse", mouse);
50 RegisterEngine("touch", touch_screen); 73 RegisterEngine("touch", touch_screen);
@@ -74,6 +97,7 @@ struct InputSubsystem::Impl {
74 } 97 }
75 98
76 void Shutdown() { 99 void Shutdown() {
100 UnregisterEngine(update_engine);
77 UnregisterEngine(keyboard); 101 UnregisterEngine(keyboard);
78 UnregisterEngine(mouse); 102 UnregisterEngine(mouse);
79 UnregisterEngine(touch_screen); 103 UnregisterEngine(touch_screen);
@@ -252,6 +276,7 @@ struct InputSubsystem::Impl {
252 } 276 }
253 277
254 void PumpEvents() const { 278 void PumpEvents() const {
279 update_engine->PumpEvents();
255#ifdef HAVE_SDL2 280#ifdef HAVE_SDL2
256 sdl->PumpEvents(); 281 sdl->PumpEvents();
257#endif 282#endif
@@ -263,6 +288,7 @@ struct InputSubsystem::Impl {
263 288
264 std::shared_ptr<MappingFactory> mapping_factory; 289 std::shared_ptr<MappingFactory> mapping_factory;
265 290
291 std::shared_ptr<UpdateEngine> update_engine;
266 std::shared_ptr<Keyboard> keyboard; 292 std::shared_ptr<Keyboard> keyboard;
267 std::shared_ptr<Mouse> mouse; 293 std::shared_ptr<Mouse> mouse;
268 std::shared_ptr<TouchScreen> touch_screen; 294 std::shared_ptr<TouchScreen> touch_screen;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index db9c94ce8..0cd87a48f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
321 case IR::Attribute::PositionY: 321 case IR::Attribute::PositionY:
322 case IR::Attribute::PositionZ: 322 case IR::Attribute::PositionZ:
323 case IR::Attribute::PositionW: 323 case IR::Attribute::PositionW:
324 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, 324 return ctx.OpLoad(
325 ctx.Const(element))); 325 ctx.F32[1],
326 ctx.need_input_position_indirect
327 ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
328 ctx.Const(element))
329 : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
326 case IR::Attribute::InstanceId: 330 case IR::Attribute::InstanceId:
327 if (ctx.profile.support_vertex_instance_id) { 331 if (ctx.profile.support_vertex_instance_id) {
328 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); 332 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 2c90f2368..c5db19d09 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); 58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
59} 59}
60 60
61Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { 61Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
62 const Id thirty_two{ctx.Const(32u)}; 62 const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
63 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; 63 const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
64 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; 64 return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
65 return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
66} 65}
67} // Anonymous namespace 66} // Anonymous namespace
68 67
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
145Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 144Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
146 Id segmentation_mask) { 145 Id segmentation_mask) {
147 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; 146 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
148 const Id thread_id{GetThreadId(ctx)}; 147 const Id thread_id{EmitLaneId(ctx)};
149 if (ctx.profile.warp_size_potentially_larger_than_guest) {
150 const Id thirty_two{ctx.Const(32u)};
151 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
152 const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
153 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
154 index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
155 clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
156 }
157 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; 148 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
158 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; 149 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
159 150
160 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; 151 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
161 const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; 152 Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
162 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 153 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
163 154
155 if (ctx.profile.warp_size_potentially_larger_than_guest) {
156 src_thread_id = AddPartitionBase(ctx, src_thread_id);
157 }
158
164 SetInBoundsFlag(inst, in_range); 159 SetInBoundsFlag(inst, in_range);
165 return SelectValue(ctx, in_range, value, src_thread_id); 160 return SelectValue(ctx, in_range, value, src_thread_id);
166} 161}
167 162
168Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 163Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
169 Id segmentation_mask) { 164 Id segmentation_mask) {
170 const Id thread_id{GetThreadId(ctx)}; 165 const Id thread_id{EmitLaneId(ctx)};
171 if (ctx.profile.warp_size_potentially_larger_than_guest) {
172 clamp = GetUpperClamp(ctx, thread_id, clamp);
173 }
174 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 166 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
175 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; 167 Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
176 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 168 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
177 169
170 if (ctx.profile.warp_size_potentially_larger_than_guest) {
171 src_thread_id = AddPartitionBase(ctx, src_thread_id);
172 }
173
178 SetInBoundsFlag(inst, in_range); 174 SetInBoundsFlag(inst, in_range);
179 return SelectValue(ctx, in_range, value, src_thread_id); 175 return SelectValue(ctx, in_range, value, src_thread_id);
180} 176}
181 177
182Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 178Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
183 Id segmentation_mask) { 179 Id segmentation_mask) {
184 const Id thread_id{GetThreadId(ctx)}; 180 const Id thread_id{EmitLaneId(ctx)};
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 clamp = GetUpperClamp(ctx, thread_id, clamp);
187 }
188 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 181 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
189 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; 182 Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
190 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 183 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
191 184
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 src_thread_id = AddPartitionBase(ctx, src_thread_id);
187 }
188
192 SetInBoundsFlag(inst, in_range); 189 SetInBoundsFlag(inst, in_range);
193 return SelectValue(ctx, in_range, value, src_thread_id); 190 return SelectValue(ctx, in_range, value, src_thread_id);
194} 191}
195 192
196Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 193Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
197 Id segmentation_mask) { 194 Id segmentation_mask) {
198 const Id thread_id{GetThreadId(ctx)}; 195 const Id thread_id{EmitLaneId(ctx)};
199 if (ctx.profile.warp_size_potentially_larger_than_guest) {
200 clamp = GetUpperClamp(ctx, thread_id, clamp);
201 }
202 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 196 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
203 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; 197 Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
204 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 198 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
205 199
200 if (ctx.profile.warp_size_potentially_larger_than_guest) {
201 src_thread_id = AddPartitionBase(ctx, src_thread_id);
202 }
203
206 SetInBoundsFlag(inst, in_range); 204 SetInBoundsFlag(inst, in_range);
207 return SelectValue(ctx, in_range, value, src_thread_id); 205 return SelectValue(ctx, in_range, value, src_thread_id);
208} 206}
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index ecb2db494..a0c155fdb 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
544 U16 = Name(TypeInt(16, false), "u16"); 544 U16 = Name(TypeInt(16, false), "u16");
545 S16 = Name(TypeInt(16, true), "s16"); 545 S16 = Name(TypeInt(16, true), "s16");
546 } 546 }
547 if (info.uses_int64) { 547 if (info.uses_int64 && profile.support_int64) {
548 AddCapability(spv::Capability::Int64); 548 AddCapability(spv::Capability::Int64);
549 U64 = Name(TypeInt(64, false), "u64"); 549 U64 = Name(TypeInt(64, false), "u64");
550 } 550 }
@@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
721 size_t label_index{0}; 721 size_t label_index{0};
722 if (info.loads.AnyComponent(IR::Attribute::PositionX)) { 722 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
723 AddLabel(labels[label_index]); 723 AddLabel(labels[label_index]);
724 const Id pointer{is_array 724 const Id pointer{[&]() {
725 ? OpAccessChain(input_f32, input_position, vertex, masked_index) 725 if (need_input_position_indirect) {
726 : OpAccessChain(input_f32, input_position, masked_index)}; 726 if (is_array)
727 return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
728 masked_index);
729 else
730 return OpAccessChain(input_f32, input_position, u32_zero_value,
731 masked_index);
732 } else {
733 if (is_array)
734 return OpAccessChain(input_f32, input_position, vertex, masked_index);
735 else
736 return OpAccessChain(input_f32, input_position, masked_index);
737 }
738 }()};
727 const Id result{OpLoad(F32[1], pointer)}; 739 const Id result{OpLoad(F32[1], pointer)};
728 OpReturnValue(result); 740 OpReturnValue(result);
729 ++label_index; 741 ++label_index;
@@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1367 Decorate(layer, spv::Decoration::Flat); 1379 Decorate(layer, spv::Decoration::Flat);
1368 } 1380 }
1369 if (loads.AnyComponent(IR::Attribute::PositionX)) { 1381 if (loads.AnyComponent(IR::Attribute::PositionX)) {
1370 const bool is_fragment{stage != Stage::Fragment}; 1382 const bool is_fragment{stage == Stage::Fragment};
1371 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; 1383 if (!is_fragment && profile.has_broken_spirv_position_input) {
1372 input_position = DefineInput(*this, F32[4], true, built_in); 1384 need_input_position_indirect = true;
1373 if (profile.support_geometry_shader_passthrough) { 1385
1374 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { 1386 const Id input_position_struct = TypeStruct(F32[4]);
1375 Decorate(input_position, spv::Decoration::PassthroughNV); 1387 input_position = DefineInput(*this, input_position_struct, true);
1388
1389 MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
1390 static_cast<unsigned>(spv::BuiltIn::Position));
1391 Decorate(input_position_struct, spv::Decoration::Block);
1392 } else {
1393 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
1394 : spv::BuiltIn::Position};
1395 input_position = DefineInput(*this, F32[4], true, built_in);
1396
1397 if (profile.support_geometry_shader_passthrough) {
1398 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
1399 Decorate(input_position, spv::Decoration::PassthroughNV);
1400 }
1376 } 1401 }
1377 } 1402 }
1378 } 1403 }
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 4414a5169..dbc5c55b9 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -280,6 +280,7 @@ public:
280 Id write_global_func_u32x2{}; 280 Id write_global_func_u32x2{};
281 Id write_global_func_u32x4{}; 281 Id write_global_func_u32x4{};
282 282
283 bool need_input_position_indirect{};
283 Id input_position{}; 284 Id input_position{};
284 std::array<Id, 32> input_generics{}; 285 std::array<Id, 32> input_generics{};
285 286
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index ac159d24b..a42453e90 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
171 } 171 }
172 return mapping; 172 return mapping;
173} 173}
174
175void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
176 const Shader::VaryingState& passthrough_mask,
177 bool passthrough_position,
178 std::optional<IR::Attribute> passthrough_layer_attr) {
179 for (u32 i = 0; i < program.output_vertices; i++) {
180 // Assign generics from input
181 for (u32 j = 0; j < 32; j++) {
182 if (!passthrough_mask.Generic(j)) {
183 continue;
184 }
185
186 const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
187 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
188 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
189 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
190 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
191 }
192
193 if (passthrough_position) {
194 // Assign position from input
195 const IR::Attribute attr = IR::Attribute::PositionX;
196 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
197 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
198 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
199 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
200 }
201
202 if (passthrough_layer_attr) {
203 // Assign layer
204 ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
205 ir.Imm32(0));
206 }
207
208 // Emit vertex
209 ir.EmitVertex(ir.Imm32(0));
210 }
211 ir.EndPrimitive(ir.Imm32(0));
212}
213
214u32 GetOutputTopologyVertices(OutputTopology output_topology) {
215 switch (output_topology) {
216 case OutputTopology::PointList:
217 return 1;
218 case OutputTopology::LineStrip:
219 return 2;
220 default:
221 return 3;
222 }
223}
224
225void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
226 for (IR::Block* const block : program.blocks) {
227 for (IR::Inst& inst : block->Instructions()) {
228 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
229 IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
230 EmitGeometryPassthrough(
231 ir, program, program.info.passthrough,
232 program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
233 }
234 }
235 }
236}
237
174} // Anonymous namespace 238} // Anonymous namespace
175 239
176IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 240IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
195 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; 259 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
196 if (program.is_geometry_passthrough) { 260 if (program.is_geometry_passthrough) {
197 const auto& mask{env.GpPassthroughMask()}; 261 const auto& mask{env.GpPassthroughMask()};
198 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { 262 for (size_t i = 0; i < mask.size() * 32; ++i) {
199 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; 263 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
200 } 264 }
265
266 if (!host_info.support_geometry_shader_passthrough) {
267 program.output_vertices = GetOutputTopologyVertices(program.output_topology);
268 LowerGeometryPassthrough(program, host_info);
269 }
201 } 270 }
202 break; 271 break;
203 } 272 }
@@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
223 292
224 Optimization::PositionPass(env, program); 293 Optimization::PositionPass(env, program);
225 294
226 Optimization::GlobalMemoryToStorageBufferPass(program); 295 Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
227 Optimization::TexturePass(env, program, host_info); 296 Optimization::TexturePass(env, program, host_info);
228 297
229 if (Settings::values.resolution_info.active) { 298 if (Settings::values.resolution_info.active) {
@@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
342 IR::Program program; 411 IR::Program program;
343 program.stage = Stage::Geometry; 412 program.stage = Stage::Geometry;
344 program.output_topology = output_topology; 413 program.output_topology = output_topology;
345 switch (output_topology) { 414 program.output_vertices = GetOutputTopologyVertices(output_topology);
346 case OutputTopology::PointList:
347 program.output_vertices = 1;
348 break;
349 case OutputTopology::LineStrip:
350 program.output_vertices = 2;
351 break;
352 default:
353 program.output_vertices = 3;
354 break;
355 }
356 415
357 program.is_geometry_passthrough = false; 416 program.is_geometry_passthrough = false;
358 program.info.loads.mask = source_program.info.stores.mask; 417 program.info.loads.mask = source_program.info.stores.mask;
@@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
366 node.data.block = current_block; 425 node.data.block = current_block;
367 426
368 IR::IREmitter ir{*current_block}; 427 IR::IREmitter ir{*current_block};
369 for (u32 i = 0; i < program.output_vertices; i++) { 428 EmitGeometryPassthrough(ir, program, program.info.stores, true,
370 // Assign generics from input 429 source_program.info.emulated_layer);
371 for (u32 j = 0; j < 32; j++) {
372 if (!program.info.stores.Generic(j)) {
373 continue;
374 }
375
376 const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
377 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
378 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
379 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
380 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
381 }
382
383 // Assign position from input
384 const IR::Attribute attr = IR::Attribute::PositionX;
385 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
386 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
387 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
388 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
389
390 // Assign layer
391 ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
392 ir.Imm32(0));
393
394 // Emit vertex
395 ir.EmitVertex(ir.Imm32(0));
396 }
397 ir.EndPrimitive(ir.Imm32(0));
398 430
399 IR::Block* return_block{block_pool.Create(inst_pool)}; 431 IR::Block* return_block{block_pool.Create(inst_pool)};
400 IR::IREmitter{*return_block}.Epilogue(); 432 IR::IREmitter{*return_block}.Epilogue();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index d5d279554..55fc48768 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -15,6 +15,9 @@ struct HostTranslateInfo {
15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered 15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers 16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS 17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
18 u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
19 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
20 ///< passthrough shaders
18}; 21};
19 22
20} // namespace Shader 23} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 336338e62..9101722ba 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
11#include "shader_recompiler/frontend/ir/breadth_first_search.h" 11#include "shader_recompiler/frontend/ir/breadth_first_search.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/host_translate_info.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
16namespace Shader::Optimization { 17namespace Shader::Optimization {
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
402} 403}
403 404
404/// Returns the offset in indices (not bytes) for an equivalent storage instruction 405/// Returns the offset in indices (not bytes) for an equivalent storage instruction
405IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { 406IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
406 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; 407 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
407 IR::U32 offset; 408 IR::U32 offset;
408 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { 409 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
415 } 416 }
416 // Subtract the least significant 32 bits from the guest offset. The result is the storage 417 // Subtract the least significant 32 bits from the guest offset. The result is the storage
417 // buffer offset in bytes. 418 // buffer offset in bytes.
418 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; 419 IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
420
421 // Align the offset base to match the host alignment requirements
422 low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
419 return ir.ISub(offset, low_cbuf); 423 return ir.ISub(offset, low_cbuf);
420} 424}
421 425
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
510} 514}
511} // Anonymous namespace 515} // Anonymous namespace
512 516
513void GlobalMemoryToStorageBufferPass(IR::Program& program) { 517void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
514 StorageInfo info; 518 StorageInfo info;
515 for (IR::Block* const block : program.post_order_blocks) { 519 for (IR::Block* const block : program.post_order_blocks) {
516 for (IR::Inst& inst : block->Instructions()) { 520 for (IR::Inst& inst : block->Instructions()) {
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
534 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; 538 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
535 IR::Block* const block{storage_inst.block}; 539 IR::Block* const block{storage_inst.block};
536 IR::Inst* const inst{storage_inst.inst}; 540 IR::Inst* const inst{storage_inst.inst};
537 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; 541 const IR::U32 offset{
542 StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
538 Replace(*block, *inst, index, offset); 543 Replace(*block, *inst, index, offset);
539 } 544 }
540} 545}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 1f8f2ba95..4ffad1172 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -15,7 +15,7 @@ namespace Shader::Optimization {
15void CollectShaderInfoPass(Environment& env, IR::Program& program); 15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(Environment& env, IR::Program& program); 16void ConstantPropagationPass(Environment& env, IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program); 17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program); 18void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program); 20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program); 21void LowerInt64ToInt32(IR::Program& program);
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index b8841a536..253e0d0bd 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -55,6 +55,8 @@ struct Profile {
55 55
56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead 56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead
57 bool has_broken_spirv_clamp{}; 57 bool has_broken_spirv_clamp{};
58 /// The Position builtin needs to be wrapped in a struct when used as an input
59 bool has_broken_spirv_position_input{};
58 /// Offset image operands with an unsigned type do not work 60 /// Offset image operands with an unsigned type do not work
59 bool has_broken_unsigned_image_offsets{}; 61 bool has_broken_unsigned_image_offsets{};
60 /// Signed instructions with unsigned data types are misinterpreted 62 /// Signed instructions with unsigned data types are misinterpreted
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 44236b6b1..f93181e1e 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -65,6 +65,8 @@ enum class Interpolation {
65struct ConstantBufferDescriptor { 65struct ConstantBufferDescriptor {
66 u32 index; 66 u32 index;
67 u32 count; 67 u32 count;
68
69 auto operator<=>(const ConstantBufferDescriptor&) const = default;
68}; 70};
69 71
70struct StorageBufferDescriptor { 72struct StorageBufferDescriptor {
@@ -72,6 +74,8 @@ struct StorageBufferDescriptor {
72 u32 cbuf_offset; 74 u32 cbuf_offset;
73 u32 count; 75 u32 count;
74 bool is_written; 76 bool is_written;
77
78 auto operator<=>(const StorageBufferDescriptor&) const = default;
75}; 79};
76 80
77struct TextureBufferDescriptor { 81struct TextureBufferDescriptor {
@@ -84,6 +88,8 @@ struct TextureBufferDescriptor {
84 u32 secondary_shift_left; 88 u32 secondary_shift_left;
85 u32 count; 89 u32 count;
86 u32 size_shift; 90 u32 size_shift;
91
92 auto operator<=>(const TextureBufferDescriptor&) const = default;
87}; 93};
88using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; 94using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
89 95
@@ -95,6 +101,8 @@ struct ImageBufferDescriptor {
95 u32 cbuf_offset; 101 u32 cbuf_offset;
96 u32 count; 102 u32 count;
97 u32 size_shift; 103 u32 size_shift;
104
105 auto operator<=>(const ImageBufferDescriptor&) const = default;
98}; 106};
99using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; 107using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
100 108
@@ -110,6 +118,8 @@ struct TextureDescriptor {
110 u32 secondary_shift_left; 118 u32 secondary_shift_left;
111 u32 count; 119 u32 count;
112 u32 size_shift; 120 u32 size_shift;
121
122 auto operator<=>(const TextureDescriptor&) const = default;
113}; 123};
114using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; 124using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
115 125
@@ -122,6 +132,8 @@ struct ImageDescriptor {
122 u32 cbuf_offset; 132 u32 cbuf_offset;
123 u32 count; 133 u32 count;
124 u32 size_shift; 134 u32 size_shift;
135
136 auto operator<=>(const ImageDescriptor&) const = default;
125}; 137};
126using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; 138using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
127 139
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index f7236afab..5cd0628f2 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
538 int num = 0; 538 int num = 0;
539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); 539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); 540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0); 541 REQUIRE(num == 1);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); 542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); 543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites(); 544 buffer.FlushCachedWrites();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f9001104c..b474eb363 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -85,6 +85,7 @@ add_library(video_core STATIC
85 gpu.h 85 gpu.h
86 gpu_thread.cpp 86 gpu_thread.cpp
87 gpu_thread.h 87 gpu_thread.h
88 invalidation_accumulator.h
88 memory_manager.cpp 89 memory_manager.cpp
89 memory_manager.h 90 memory_manager.h
90 precompiled_headers.h 91 precompiled_headers.h
@@ -192,6 +193,8 @@ add_library(video_core STATIC
192 renderer_vulkan/vk_texture_cache.cpp 193 renderer_vulkan/vk_texture_cache.cpp
193 renderer_vulkan/vk_texture_cache.h 194 renderer_vulkan/vk_texture_cache.h
194 renderer_vulkan/vk_texture_cache_base.cpp 195 renderer_vulkan/vk_texture_cache_base.cpp
196 renderer_vulkan/vk_turbo_mode.cpp
197 renderer_vulkan/vk_turbo_mode.h
195 renderer_vulkan/vk_update_descriptor.cpp 198 renderer_vulkan/vk_update_descriptor.cpp
196 renderer_vulkan/vk_update_descriptor.h 199 renderer_vulkan/vk_update_descriptor.h
197 shader_cache.cpp 200 shader_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 92d77eef2..c47b7d866 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -430,7 +430,7 @@ private:
430 if (query_begin >= SizeBytes() || size < 0) { 430 if (query_begin >= SizeBytes() || size < 0) {
431 return; 431 return;
432 } 432 }
433 u64* const untracked_words = Array<Type::Untracked>(); 433 [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
434 u64* const state_words = Array<type>(); 434 u64* const state_words = Array<type>();
435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); 435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; 436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); 483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
484 } 484 }
485 // Exclude CPU modified pages when visiting GPU pages 485 // Exclude CPU modified pages when visiting GPU pages
486 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); 486 const u64 word = current_word;
487 u64 page = page_begin; 487 u64 page = page_begin;
488 page_begin = 0; 488 page_begin = 0;
489 489
@@ -531,7 +531,7 @@ private:
531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
532 static_assert(type != Type::Untracked); 532 static_assert(type != Type::Untracked);
533 533
534 const u64* const untracked_words = Array<Type::Untracked>(); 534 [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
535 const u64* const state_words = Array<type>(); 535 const u64* const state_words = Array<type>();
536 const u64 num_query_words = size / BYTES_PER_WORD + 1; 536 const u64 num_query_words = size / BYTES_PER_WORD + 1;
537 const u64 word_begin = offset / BYTES_PER_WORD; 537 const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,8 +539,7 @@ private:
539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
542 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; 542 const u64 word = state_words[word_index];
543 const u64 word = state_words[word_index] & ~off_word;
544 if (word == 0) { 543 if (word == 0) {
545 continue; 544 continue;
546 } 545 }
@@ -564,7 +563,7 @@ private:
564 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { 563 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
565 static_assert(type != Type::Untracked); 564 static_assert(type != Type::Untracked);
566 565
567 const u64* const untracked_words = Array<Type::Untracked>(); 566 [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
568 const u64* const state_words = Array<type>(); 567 const u64* const state_words = Array<type>();
569 const u64 num_query_words = size / BYTES_PER_WORD + 1; 568 const u64 num_query_words = size / BYTES_PER_WORD + 1;
570 const u64 word_begin = offset / BYTES_PER_WORD; 569 const u64 word_begin = offset / BYTES_PER_WORD;
@@ -574,8 +573,7 @@ private:
574 u64 begin = std::numeric_limits<u64>::max(); 573 u64 begin = std::numeric_limits<u64>::max();
575 u64 end = 0; 574 u64 end = 0;
576 for (u64 word_index = word_begin; word_index < word_end; ++word_index) { 575 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
577 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; 576 const u64 word = state_words[word_index];
578 const u64 word = state_words[word_index] & ~off_word;
579 if (word == 0) { 577 if (word == 0) {
580 continue; 578 continue;
581 } 579 }
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 06fd40851..627917ab6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1938 bool is_written) const { 1938 bool is_written) const {
1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); 1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); 1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
1941 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1941 const u32 alignment = runtime.GetStorageBufferAlignment();
1942
1943 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1944 const u32 aligned_size =
1945 Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
1946
1947 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1942 if (!cpu_addr || size == 0) { 1948 if (!cpu_addr || size == 0) {
1943 return NULL_BINDING; 1949 return NULL_BINDING;
1944 } 1950 }
1945 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1951
1952 const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
1946 const Binding binding{ 1953 const Binding binding{
1947 .cpu_addr = *cpu_addr, 1954 .cpu_addr = *cpu_addr,
1948 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1955 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
1949 .buffer_id = BufferId{}, 1956 .buffer_id = BufferId{},
1950 }; 1957 };
1951 return binding; 1958 return binding;
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index cea1dd8b0..7f5a0c29d 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, 76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
77 x_elements, regs.line_count, regs.dest.BlockHeight(), 77 x_elements, regs.line_count, regs.dest.BlockHeight(),
78 regs.dest.BlockDepth(), regs.line_length_in); 78 regs.dest.BlockDepth(), regs.line_length_in);
79 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); 79 memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
80 } 80 }
81} 81}
82 82
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index e655e7254..a126c359c 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/sw_blitter/blitter.h" 8#include "video_core/engines/sw_blitter/blitter.h"
9#include "video_core/memory_manager.h"
9#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
10#include "video_core/surface.h" 11#include "video_core/surface.h"
11#include "video_core/textures/decoders.h" 12#include "video_core/textures/decoders.h"
@@ -20,8 +21,8 @@ namespace Tegra::Engines {
20 21
21using namespace Texture; 22using namespace Texture;
22 23
23Fermi2D::Fermi2D(MemoryManager& memory_manager_) { 24Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
24 sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); 25 sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
25 // Nvidia's OpenGL driver seems to assume these values 26 // Nvidia's OpenGL driver seems to assume these values
26 regs.src.depth = 1; 27 regs.src.depth = 1;
27 regs.dst.depth = 1; 28 regs.dst.depth = 1;
@@ -104,6 +105,7 @@ void Fermi2D::Blit() {
104 config.src_x0 = 0; 105 config.src_x0 = 0;
105 } 106 }
106 107
108 memory_manager.FlushCaching();
107 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { 109 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
108 sw_blitter->Blit(src, regs.dst, config); 110 sw_blitter->Blit(src, regs.dst, config);
109 } 111 }
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 523fbdec2..705b323e1 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -305,6 +305,7 @@ public:
305private: 305private:
306 VideoCore::RasterizerInterface* rasterizer = nullptr; 306 VideoCore::RasterizerInterface* rasterizer = nullptr;
307 std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; 307 std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
308 MemoryManager& memory_manager;
308 309
309 /// Performs the copy from the source surface to the destination surface as configured in the 310 /// Performs the copy from the source surface to the destination surface as configured in the
310 /// registers. 311 /// registers.
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a0555ef3f..ae9da6290 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -468,7 +468,7 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
468} 468}
469 469
470void Maxwell3D::ProcessFirmwareCall4() { 470void Maxwell3D::ProcessFirmwareCall4() {
471 LOG_WARNING(HW_GPU, "(STUBBED) called"); 471 LOG_DEBUG(HW_GPU, "(STUBBED) called");
472 472
473 // Firmware call 4 is a blob that changes some registers depending on its parameters. 473 // Firmware call 4 is a blob that changes some registers depending on its parameters.
474 // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. 474 // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
@@ -486,11 +486,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
486} 486}
487 487
488void Maxwell3D::ProcessQueryGet() { 488void Maxwell3D::ProcessQueryGet() {
489 // TODO(Subv): Support the other query units.
490 if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
491 LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
492 }
493
494 switch (regs.report_semaphore.query.operation) { 489 switch (regs.report_semaphore.query.operation) {
495 case Regs::ReportSemaphore::Operation::Release: 490 case Regs::ReportSemaphore::Operation::Release:
496 if (regs.report_semaphore.query.short_query != 0) { 491 if (regs.report_semaphore.query.short_query != 0) {
@@ -650,7 +645,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
650 645
651 const GPUVAddr address{buffer_address + regs.const_buffer.offset}; 646 const GPUVAddr address{buffer_address + regs.const_buffer.offset};
652 const size_t copy_size = amount * sizeof(u32); 647 const size_t copy_size = amount * sizeof(u32);
653 memory_manager.WriteBlock(address, start_base, copy_size); 648 memory_manager.WriteBlockCached(address, start_base, copy_size);
654 649
655 // Increment the current buffer position. 650 // Increment the current buffer position.
656 regs.const_buffer.offset += static_cast<u32>(copy_size); 651 regs.const_buffer.offset += static_cast<u32>(copy_size);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 01f70ea9e..7762c7d96 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
69 if (launch.multi_line_enable) { 69 if (launch.multi_line_enable) {
70 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; 70 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
71 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; 71 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
72 72 memory_manager.FlushCaching();
73 if (!is_src_pitch && !is_dst_pitch) { 73 if (!is_src_pitch && !is_dst_pitch) {
74 // If both the source and the destination are in block layout, assert. 74 // If both the source and the destination are in block layout, assert.
75 CopyBlockLinearToBlockLinear(); 75 CopyBlockLinearToBlockLinear();
@@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
104 reinterpret_cast<u8*>(tmp_buffer.data()), 104 reinterpret_cast<u8*>(tmp_buffer.data()),
105 regs.line_length_in * sizeof(u32)); 105 regs.line_length_in * sizeof(u32));
106 } else { 106 } else {
107 memory_manager.FlushCaching();
107 const auto convert_linear_2_blocklinear_addr = [](u64 address) { 108 const auto convert_linear_2_blocklinear_addr = [](u64 address) {
108 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | 109 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
109 ((address & 0x180) >> 1) | ((address & 0x20) << 3); 110 ((address & 0x180) >> 1) | ((address & 0x20) << 3);
@@ -121,8 +122,8 @@ void MaxwellDMA::Launch() {
121 memory_manager.ReadBlockUnsafe( 122 memory_manager.ReadBlockUnsafe(
122 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 123 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
123 tmp_buffer.data(), tmp_buffer.size()); 124 tmp_buffer.data(), tmp_buffer.size());
124 memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), 125 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
125 tmp_buffer.size()); 126 tmp_buffer.size());
126 } 127 }
127 } else if (is_src_pitch && !is_dst_pitch) { 128 } else if (is_src_pitch && !is_dst_pitch) {
128 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 129 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 133 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
133 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 134 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
134 tmp_buffer.size()); 135 tmp_buffer.size());
135 memory_manager.WriteBlock( 136 memory_manager.WriteBlockCached(
136 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 137 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
137 tmp_buffer.data(), tmp_buffer.size()); 138 tmp_buffer.data(), tmp_buffer.size());
138 } 139 }
@@ -141,8 +142,8 @@ void MaxwellDMA::Launch() {
141 std::vector<u8> tmp_buffer(regs.line_length_in); 142 std::vector<u8> tmp_buffer(regs.line_length_in);
142 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 143 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
143 regs.line_length_in); 144 regs.line_length_in);
144 memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), 145 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
145 regs.line_length_in); 146 regs.line_length_in);
146 } 147 }
147 } 148 }
148 } 149 }
@@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
204 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 205 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
205 regs.pitch_out); 206 regs.pitch_out);
206 207
207 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 208 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
208} 209}
209 210
210void MaxwellDMA::CopyPitchToBlockLinear() { 211void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
256 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 257 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
257 regs.pitch_in); 258 regs.pitch_in);
258 259
259 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 260 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
260} 261}
261 262
262void MaxwellDMA::FastCopyBlockLinearToPitch() { 263void MaxwellDMA::FastCopyBlockLinearToPitch() {
@@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
287 regs.src_params.block_size.height, regs.src_params.block_size.depth, 288 regs.src_params.block_size.height, regs.src_params.block_size.depth,
288 regs.pitch_out); 289 regs.pitch_out);
289 290
290 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 291 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
291} 292}
292 293
293void MaxwellDMA::CopyBlockLinearToBlockLinear() { 294void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
347 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, 348 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
348 dst.block_size.height, dst.block_size.depth, pitch); 349 dst.block_size.height, dst.block_size.depth, pitch);
349 350
350 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 351 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
351} 352}
352 353
353void MaxwellDMA::ReleaseSemaphore() { 354void MaxwellDMA::ReleaseSemaphore() {
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 1a7961cb9..e968ae220 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -47,6 +47,7 @@ set(SHADER_FILES
47 vulkan_present_scaleforce_fp16.frag 47 vulkan_present_scaleforce_fp16.frag
48 vulkan_present_scaleforce_fp32.frag 48 vulkan_present_scaleforce_fp32.frag
49 vulkan_quad_indexed.comp 49 vulkan_quad_indexed.comp
50 vulkan_turbo_mode.comp
50 vulkan_uint8.comp 51 vulkan_uint8.comp
51) 52)
52 53
diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp
new file mode 100644
index 000000000..d651001d9
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp
@@ -0,0 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#version 460 core
5
6layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
7
8layout (binding = 0) buffer ThreadData {
9 uint data[];
10};
11
12uint xorshift32(uint x) {
13 x ^= x << 13;
14 x ^= x >> 17;
15 x ^= x << 5;
16 return x;
17}
18
19uint getGlobalIndex() {
20 return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y;
21}
22
23void main() {
24 uint myIndex = xorshift32(getGlobalIndex());
25 uint otherIndex = xorshift32(myIndex);
26
27 uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1;
28 atomicAdd(data[myIndex % data.length()], otherValue);
29}
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h
new file mode 100644
index 000000000..2c2aaf7bb
--- /dev/null
+++ b/src/video_core/invalidation_accumulator.h
@@ -0,0 +1,79 @@
1// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <utility>
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace VideoCommon {
12
13class InvalidationAccumulator {
14public:
15 InvalidationAccumulator() = default;
16 ~InvalidationAccumulator() = default;
17
18 void Add(GPUVAddr address, size_t size) {
19 const auto reset_values = [&]() {
20 if (has_collected) {
21 buffer.emplace_back(start_address, accumulated_size);
22 }
23 start_address = address;
24 accumulated_size = size;
25 last_collection = start_address + size;
26 };
27 if (address >= start_address && address + size <= last_collection) [[likely]] {
28 return;
29 }
30 size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
31 address = address & atomicity_mask;
32 if (!has_collected) [[unlikely]] {
33 reset_values();
34 has_collected = true;
35 return;
36 }
37 if (address != last_collection) [[unlikely]] {
38 reset_values();
39 return;
40 }
41 accumulated_size += size;
42 last_collection += size;
43 }
44
45 void Clear() {
46 buffer.clear();
47 start_address = 0;
48 last_collection = 0;
49 has_collected = false;
50 }
51
52 bool AnyAccumulated() const {
53 return has_collected;
54 }
55
56 template <typename Func>
57 void Callback(Func&& func) {
58 if (!has_collected) {
59 return;
60 }
61 buffer.emplace_back(start_address, accumulated_size);
62 for (auto& [address, size] : buffer) {
63 func(address, size);
64 }
65 }
66
67private:
68 static constexpr size_t atomicity_bits = 5;
69 static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
70 static constexpr size_t atomicity_size_mask = atomicity_size - 1;
71 static constexpr size_t atomicity_mask = ~atomicity_size_mask;
72 GPUVAddr start_address{};
73 GPUVAddr last_collection{};
74 size_t accumulated_size{};
75 bool has_collected{};
76 std::vector<std::pair<VAddr, size_t>> buffer;
77};
78
79} // namespace VideoCommon
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index a5476e795..6272a4652 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -50,38 +50,6 @@ protected:
50 Maxwell3D& maxwell3d; 50 Maxwell3D& maxwell3d;
51}; 51};
52 52
53class HLE_DrawArrays final : public HLEMacroImpl {
54public:
55 explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
56
57 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
58 maxwell3d.RefreshParameters();
59
60 auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
61 maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2],
62 maxwell3d.regs.global_base_instance_index, 1);
63 }
64};
65
66class HLE_DrawIndexed final : public HLEMacroImpl {
67public:
68 explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
69
70 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
71 maxwell3d.RefreshParameters();
72 maxwell3d.regs.index_buffer.start_addr_high = parameters[1];
73 maxwell3d.regs.index_buffer.start_addr_low = parameters[2];
74 maxwell3d.regs.index_buffer.format =
75 static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]);
76 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
77
78 auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
79 maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4],
80 maxwell3d.regs.global_base_vertex_index,
81 maxwell3d.regs.global_base_instance_index, 1);
82 }
83};
84
85/* 53/*
86 * @note: these macros have two versions, a normal and extended version, with the extended version 54 * @note: these macros have two versions, a normal and extended version, with the extended version
87 * also assigning the base vertex/instance. 55 * also assigning the base vertex/instance.
@@ -497,11 +465,6 @@ public:
497} // Anonymous namespace 465} // Anonymous namespace
498 466
499HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { 467HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
500 builders.emplace(0xDD6A7FA92A7D2674ULL,
501 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
502 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
503 return std::make_unique<HLE_DrawArrays>(maxwell3d__);
504 }));
505 builders.emplace(0x0D61FC9FAAC9FCADULL, 468 builders.emplace(0x0D61FC9FAAC9FCADULL,
506 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( 469 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
507 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 470 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
@@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
512 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 475 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
513 return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); 476 return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
514 })); 477 }));
515 builders.emplace(0x2DB33AADB741839CULL,
516 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
517 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
518 return std::make_unique<HLE_DrawIndexed>(maxwell3d__);
519 }));
520 builders.emplace(0x771BB18C62444DA0ULL, 478 builders.emplace(0x771BB18C62444DA0ULL,
521 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( 479 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
522 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 480 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3a5cdeb39..3bcae3503 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -6,11 +6,13 @@
6#include "common/alignment.h" 6#include "common/alignment.h"
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/settings.h"
9#include "core/core.h" 10#include "core/core.h"
10#include "core/device_memory.h" 11#include "core/device_memory.h"
11#include "core/hle/kernel/k_page_table.h" 12#include "core/hle/kernel/k_page_table.h"
12#include "core/hle/kernel/k_process.h" 13#include "core/hle/kernel/k_process.h"
13#include "core/memory.h" 14#include "core/memory.h"
15#include "video_core/invalidation_accumulator.h"
14#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16#include "video_core/renderer_base.h" 18#include "video_core/renderer_base.h"
@@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
26 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, 28 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
27 page_bits != big_page_bits ? page_bits : 0}, 29 page_bits != big_page_bits ? page_bits : 0},
28 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( 30 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
29 1, std::memory_order_acq_rel)} { 31 1, std::memory_order_acq_rel)},
32 accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
30 address_space_size = 1ULL << address_space_bits; 33 address_space_size = 1ULL << address_space_bits;
31 page_size = 1ULL << page_bits; 34 page_size = 1ULL << page_bits;
32 page_mask = page_size - 1ULL; 35 page_mask = page_size - 1ULL;
@@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
43 big_page_table_cpu.resize(big_page_table_size); 46 big_page_table_cpu.resize(big_page_table_size);
44 big_page_continous.resize(big_page_table_size / continous_bits, 0); 47 big_page_continous.resize(big_page_table_size / continous_bits, 0);
45 entries.resize(page_table_size / 32, 0); 48 entries.resize(page_table_size / 32, 0);
49 if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
50 fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
51 } else {
52 fastmem_arena = nullptr;
53 }
46} 54}
47 55
48MemoryManager::~MemoryManager() = default; 56MemoryManager::~MemoryManager() = default;
@@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
185 if (size == 0) { 193 if (size == 0) {
186 return; 194 return;
187 } 195 }
188 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); 196 GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
189
190 for (const auto& [map_addr, map_size] : submapped_ranges) {
191 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
192 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
193 ASSERT(cpu_addr);
194 197
195 rasterizer->UnmapMemory(*cpu_addr, map_size); 198 for (const auto& [map_addr, map_size] : page_stash) {
199 rasterizer->UnmapMemory(map_addr, map_size);
196 } 200 }
201 page_stash.clear();
197 202
198 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 203 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
199 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 204 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
@@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
355 } 360 }
356} 361}
357 362
358template <bool is_safe> 363template <bool is_safe, bool use_fastmem>
359void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 364void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
360 [[maybe_unused]] VideoCommon::CacheType which) const { 365 [[maybe_unused]] VideoCommon::CacheType which) const {
361 auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, 366 auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
@@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
369 if constexpr (is_safe) { 374 if constexpr (is_safe) {
370 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 375 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
371 } 376 }
372 u8* physical = memory.GetPointer(cpu_addr_base); 377 if constexpr (use_fastmem) {
373 std::memcpy(dest_buffer, physical, copy_amount); 378 std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
379 } else {
380 u8* physical = memory.GetPointer(cpu_addr_base);
381 std::memcpy(dest_buffer, physical, copy_amount);
382 }
374 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 383 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
375 }; 384 };
376 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 385 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
379 if constexpr (is_safe) { 388 if constexpr (is_safe) {
380 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 389 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
381 } 390 }
382 if (!IsBigPageContinous(page_index)) [[unlikely]] { 391 if constexpr (use_fastmem) {
383 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); 392 std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
384 } else { 393 } else {
385 u8* physical = memory.GetPointer(cpu_addr_base); 394 if (!IsBigPageContinous(page_index)) [[unlikely]] {
386 std::memcpy(dest_buffer, physical, copy_amount); 395 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
396 } else {
397 u8* physical = memory.GetPointer(cpu_addr_base);
398 std::memcpy(dest_buffer, physical, copy_amount);
399 }
387 } 400 }
388 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 401 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
389 }; 402 };
@@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
397 410
398void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 411void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
399 VideoCommon::CacheType which) const { 412 VideoCommon::CacheType which) const {
400 ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); 413 if (fastmem_arena) [[likely]] {
414 ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
415 return;
416 }
417 ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
401} 418}
402 419
403void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, 420void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
404 const std::size_t size) const { 421 const std::size_t size) const {
405 ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); 422 if (fastmem_arena) [[likely]] {
423 ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
424 return;
425 }
426 ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
406} 427}
407 428
408template <bool is_safe> 429template <bool is_safe>
@@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
454 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); 475 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
455} 476}
456 477
478void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
479 std::size_t size) {
480 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
481 accumulator->Add(gpu_dest_addr, size);
482}
483
457void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, 484void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
458 VideoCommon::CacheType which) const { 485 VideoCommon::CacheType which) const {
459 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, 486 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
@@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
663std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 690std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
664 GPUVAddr gpu_addr, std::size_t size) const { 691 GPUVAddr gpu_addr, std::size_t size) const {
665 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 692 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
666 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; 693 GetSubmappedRangeImpl<true>(gpu_addr, size, result);
694 return result;
695}
696
697template <bool is_gpu_address>
698void MemoryManager::GetSubmappedRangeImpl(
699 GPUVAddr gpu_addr, std::size_t size,
700 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
701 result) const {
702 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
703 last_segment{};
667 std::optional<VAddr> old_page_addr{}; 704 std::optional<VAddr> old_page_addr{};
668 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, 705 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
669 [[maybe_unused]] std::size_t offset, 706 [[maybe_unused]] std::size_t offset,
@@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
685 } 722 }
686 old_page_addr = {cpu_addr_base + copy_amount}; 723 old_page_addr = {cpu_addr_base + copy_amount};
687 if (!last_segment) { 724 if (!last_segment) {
688 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; 725 if constexpr (is_gpu_address) {
689 last_segment = {new_base_addr, copy_amount}; 726 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
727 last_segment = {new_base_addr, copy_amount};
728 } else {
729 last_segment = {cpu_addr_base, copy_amount};
730 }
690 } else { 731 } else {
691 last_segment->second += copy_amount; 732 last_segment->second += copy_amount;
692 } 733 }
@@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
703 } 744 }
704 old_page_addr = {cpu_addr_base + copy_amount}; 745 old_page_addr = {cpu_addr_base + copy_amount};
705 if (!last_segment) { 746 if (!last_segment) {
706 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; 747 if constexpr (is_gpu_address) {
707 last_segment = {new_base_addr, copy_amount}; 748 const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
749 last_segment = {new_base_addr, copy_amount};
750 } else {
751 last_segment = {cpu_addr_base, copy_amount};
752 }
708 } else { 753 } else {
709 last_segment->second += copy_amount; 754 last_segment->second += copy_amount;
710 } 755 }
@@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
715 }; 760 };
716 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); 761 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
717 split(0, 0, 0); 762 split(0, 0, 0);
718 return result; 763}
764
765void MemoryManager::FlushCaching() {
766 if (!accumulator->AnyAccumulated()) {
767 return;
768 }
769 accumulator->Callback([this](GPUVAddr addr, size_t size) {
770 GetSubmappedRangeImpl<false>(addr, size, page_stash);
771 });
772 rasterizer->InnerInvalidation(page_stash);
773 page_stash.clear();
774 accumulator->Clear();
719} 775}
720 776
721} // namespace Tegra 777} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 828e13439..2936364f0 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -19,6 +19,10 @@ namespace VideoCore {
19class RasterizerInterface; 19class RasterizerInterface;
20} 20}
21 21
22namespace VideoCommon {
23class InvalidationAccumulator;
24}
25
22namespace Core { 26namespace Core {
23class DeviceMemory; 27class DeviceMemory;
24namespace Memory { 28namespace Memory {
@@ -80,6 +84,7 @@ public:
80 */ 84 */
81 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; 85 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
82 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); 86 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
87 void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
83 88
84 /** 89 /**
85 * Checks if a gpu region can be simply read with a pointer. 90 * Checks if a gpu region can be simply read with a pointer.
@@ -129,12 +134,14 @@ public:
129 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, 134 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
130 size_t max_size = std::numeric_limits<size_t>::max()) const; 135 size_t max_size = std::numeric_limits<size_t>::max()) const;
131 136
137 void FlushCaching();
138
132private: 139private:
133 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> 140 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
134 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, 141 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
135 FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; 142 FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
136 143
137 template <bool is_safe> 144 template <bool is_safe, bool use_fastmem>
138 void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 145 void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
139 VideoCommon::CacheType which) const; 146 VideoCommon::CacheType which) const;
140 147
@@ -154,6 +161,12 @@ private:
154 inline bool IsBigPageContinous(size_t big_page_index) const; 161 inline bool IsBigPageContinous(size_t big_page_index) const;
155 inline void SetBigPageContinous(size_t big_page_index, bool value); 162 inline void SetBigPageContinous(size_t big_page_index, bool value);
156 163
164 template <bool is_gpu_address>
165 void GetSubmappedRangeImpl(
166 GPUVAddr gpu_addr, std::size_t size,
167 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
168 result) const;
169
157 Core::System& system; 170 Core::System& system;
158 Core::Memory::Memory& memory; 171 Core::Memory::Memory& memory;
159 Core::DeviceMemory& device_memory; 172 Core::DeviceMemory& device_memory;
@@ -201,10 +214,13 @@ private:
201 Common::VirtualBuffer<u32> big_page_table_cpu; 214 Common::VirtualBuffer<u32> big_page_table_cpu;
202 215
203 std::vector<u64> big_page_continous; 216 std::vector<u64> big_page_continous;
217 std::vector<std::pair<VAddr, std::size_t>> page_stash{};
218 u8* fastmem_arena{};
204 219
205 constexpr static size_t continous_bits = 64; 220 constexpr static size_t continous_bits = 64;
206 221
207 const size_t unique_identifier; 222 const size_t unique_identifier;
223 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
208 224
209 static std::atomic<size_t> unique_identifier_generator; 225 static std::atomic<size_t> unique_identifier_generator;
210}; 226};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f980b12c6..33e2610bc 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
6#include <functional> 6#include <functional>
7#include <optional> 7#include <optional>
8#include <span> 8#include <span>
9#include <utility>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/polyfill_thread.h" 11#include "common/polyfill_thread.h"
11#include "video_core/cache_types.h" 12#include "video_core/cache_types.h"
@@ -98,6 +99,12 @@ public:
98 virtual void InvalidateRegion(VAddr addr, u64 size, 99 virtual void InvalidateRegion(VAddr addr, u64 size,
99 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
100 101
102 virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
103 for (const auto& [cpu_addr, size] : sequences) {
104 InvalidateRegion(cpu_addr, size);
105 }
106 }
107
101 /// Notify rasterizer that any caches of the specified region are desync with guest 108 /// Notify rasterizer that any caches of the specified region are desync with guest
102 virtual void OnCPUWrite(VAddr addr, u64 size) = 0; 109 virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
103 110
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..bb1962073 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -160,6 +160,10 @@ public:
160 return device.CanReportMemoryUsage(); 160 return device.CanReportMemoryUsage();
161 } 161 }
162 162
163 u32 GetStorageBufferAlignment() const {
164 return static_cast<u32>(device.GetShaderStorageBufferAlignment());
165 }
166
163private: 167private:
164 static constexpr std::array PABO_LUT{ 168 static constexpr std::array PABO_LUT{
165 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 169 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 651608a06..7bced675c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -140,6 +140,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
140void RasterizerOpenGL::Clear(u32 layer_count) { 140void RasterizerOpenGL::Clear(u32 layer_count) {
141 MICROPROFILE_SCOPE(OpenGL_Clears); 141 MICROPROFILE_SCOPE(OpenGL_Clears);
142 142
143 gpu_memory->FlushCaching();
143 const auto& regs = maxwell3d->regs; 144 const auto& regs = maxwell3d->regs;
144 bool use_color{}; 145 bool use_color{};
145 bool use_depth{}; 146 bool use_depth{};
@@ -208,6 +209,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
208 MICROPROFILE_SCOPE(OpenGL_Drawing); 209 MICROPROFILE_SCOPE(OpenGL_Drawing);
209 210
210 SCOPE_EXIT({ gpu.TickWork(); }); 211 SCOPE_EXIT({ gpu.TickWork(); });
212 gpu_memory->FlushCaching();
211 query_cache.UpdateCounters(); 213 query_cache.UpdateCounters();
212 214
213 GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; 215 GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
@@ -361,6 +363,7 @@ void RasterizerOpenGL::DrawTexture() {
361} 363}
362 364
363void RasterizerOpenGL::DispatchCompute() { 365void RasterizerOpenGL::DispatchCompute() {
366 gpu_memory->FlushCaching();
364 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; 367 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
365 if (!pipeline) { 368 if (!pipeline) {
366 return; 369 return;
@@ -568,6 +571,7 @@ void RasterizerOpenGL::TickFrame() {
568} 571}
569 572
570bool RasterizerOpenGL::AccelerateConditionalRendering() { 573bool RasterizerOpenGL::AccelerateConditionalRendering() {
574 gpu_memory->FlushCaching();
571 if (Settings::IsGPULevelHigh()) { 575 if (Settings::IsGPULevelHigh()) {
572 // Reimplement Host conditional rendering. 576 // Reimplement Host conditional rendering.
573 return false; 577 return false;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 03b6314ff..7dd854e0f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
236 .needs_demote_reorder = device.IsAmd(), 236 .needs_demote_reorder = device.IsAmd(),
237 .support_snorm_render_buffer = false, 237 .support_snorm_render_buffer = false,
238 .support_viewport_index_layer = device.HasVertexViewportLayer(), 238 .support_viewport_index_layer = device.HasVertexViewportLayer(),
239 .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
240 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
239 } { 241 } {
240 if (use_asynchronous_shaders) { 242 if (use_asynchronous_shaders) {
241 workers = CreateWorkers(); 243 workers = CreateWorkers();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bc75680f0..de95f2634 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -442,7 +442,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
442 442
443 glBindTextureUnit(0, screen_info.display_texture); 443 glBindTextureUnit(0, screen_info.display_texture);
444 444
445 const auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); 445 auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
446 if (anti_aliasing > Settings::AntiAliasing::LastAA) {
447 LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing);
448 anti_aliasing = Settings::AntiAliasing::None;
449 Settings::values.anti_aliasing.SetValue(anti_aliasing);
450 }
451
446 if (anti_aliasing != Settings::AntiAliasing::None) { 452 if (anti_aliasing != Settings::AntiAliasing::None) {
447 glEnablei(GL_SCISSOR_TEST, 0); 453 glEnablei(GL_SCISSOR_TEST, 0);
448 auto viewport_width = screen_info.texture.width; 454 auto viewport_width = screen_info.texture.width;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 3d328a250..f8398b511 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -148,7 +148,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe
148 }); 148 });
149 } 149 }
150 if (!extended_dynamic_state_2_extra) { 150 if (!extended_dynamic_state_2_extra) {
151 dynamic_state.Refresh2(regs, topology, extended_dynamic_state_2); 151 dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2);
152 } 152 }
153 if (!extended_dynamic_state_3_blend) { 153 if (!extended_dynamic_state_3_blend) {
154 if (maxwell3d.dirty.flags[Dirty::Blending]) { 154 if (maxwell3d.dirty.flags[Dirty::Blending]) {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index f502a7d09..2a8d9e377 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -60,24 +60,13 @@ std::string GetDriverVersion(const Device& device) {
60 return GetReadableVersion(version); 60 return GetReadableVersion(version);
61} 61}
62 62
63std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { 63std::string BuildCommaSeparatedExtensions(
64 std::sort(std::begin(available_extensions), std::end(available_extensions)); 64 const std::set<std::string, std::less<>>& available_extensions) {
65 65 return fmt::format("{}", fmt::join(available_extensions, ","));
66 static constexpr std::size_t AverageExtensionSize = 64;
67 std::string separated_extensions;
68 separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
69
70 const auto end = std::end(available_extensions);
71 for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
72 if (const bool is_last = extension + 1 == end; is_last) {
73 separated_extensions += *extension;
74 } else {
75 separated_extensions += fmt::format("{},", *extension);
76 }
77 }
78 return separated_extensions;
79} 66}
80 67
68} // Anonymous namespace
69
81Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, 70Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
82 VkSurfaceKHR surface) { 71 VkSurfaceKHR surface) {
83 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); 72 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
@@ -89,7 +78,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
89 const vk::PhysicalDevice physical_device(devices[device_index], dld); 78 const vk::PhysicalDevice physical_device(devices[device_index], dld);
90 return Device(*instance, physical_device, surface, dld); 79 return Device(*instance, physical_device, surface, dld);
91} 80}
92} // Anonymous namespace
93 81
94RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 82RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
95 Core::Frontend::EmuWindow& emu_window, 83 Core::Frontend::EmuWindow& emu_window,
@@ -98,7 +86,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
98 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), 86 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
99 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), 87 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
100 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 88 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
101 true, Settings::values.renderer_debug.GetValue())), 89 Settings::values.renderer_debug.GetValue())),
102 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), 90 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
103 surface(CreateSurface(instance, render_window)), 91 surface(CreateSurface(instance, render_window)),
104 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), 92 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
@@ -109,6 +97,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
109 screen_info), 97 screen_info),
110 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 98 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
111 state_tracker, scheduler) { 99 state_tracker, scheduler) {
100 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
101 turbo_mode.emplace(instance, dld);
102 scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
103 }
112 Report(); 104 Report();
113} catch (const vk::Exception& exception) { 105} catch (const vk::Exception& exception) {
114 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); 106 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
@@ -116,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
116} 108}
117 109
118RendererVulkan::~RendererVulkan() { 110RendererVulkan::~RendererVulkan() {
111 scheduler.RegisterOnSubmit([] {});
119 void(device.GetLogical().WaitIdle()); 112 void(device.GetLogical().WaitIdle());
120} 113}
121 114
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index e7bfecb20..009e75e0d 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -13,6 +13,7 @@
13#include "video_core/renderer_vulkan/vk_scheduler.h" 13#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h" 14#include "video_core/renderer_vulkan/vk_state_tracker.h"
15#include "video_core/renderer_vulkan/vk_swapchain.h" 15#include "video_core/renderer_vulkan/vk_swapchain.h"
16#include "video_core/renderer_vulkan/vk_turbo_mode.h"
16#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
17#include "video_core/vulkan_common/vulkan_memory_allocator.h" 18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -31,6 +32,9 @@ class GPU;
31 32
32namespace Vulkan { 33namespace Vulkan {
33 34
35Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
36 VkSurfaceKHR surface);
37
34class RendererVulkan final : public VideoCore::RendererBase { 38class RendererVulkan final : public VideoCore::RendererBase {
35public: 39public:
36 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 40 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -74,6 +78,7 @@ private:
74 Swapchain swapchain; 78 Swapchain swapchain;
75 BlitScreen blit_screen; 79 BlitScreen blit_screen;
76 RasterizerVulkan rasterizer; 80 RasterizerVulkan rasterizer;
81 std::optional<TurboMode> turbo_mode;
77}; 82};
78 83
79} // namespace Vulkan 84} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 487d8b416..1cfb4c2ff 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -330,12 +330,19 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
330 return device.CanReportMemoryUsage(); 330 return device.CanReportMemoryUsage();
331} 331}
332 332
333u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
334 return static_cast<u32>(device.GetStorageBufferAlignment());
335}
336
333void BufferCacheRuntime::Finish() { 337void BufferCacheRuntime::Finish() {
334 scheduler.Finish(); 338 scheduler.Finish();
335} 339}
336 340
337void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, 341void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
338 std::span<const VideoCommon::BufferCopy> copies, bool barrier) { 342 std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
343 if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
344 return;
345 }
339 static constexpr VkMemoryBarrier READ_BARRIER{ 346 static constexpr VkMemoryBarrier READ_BARRIER{
340 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 347 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
341 .pNext = nullptr, 348 .pNext = nullptr,
@@ -394,6 +401,9 @@ void BufferCacheRuntime::PostCopyBarrier() {
394} 401}
395 402
396void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) { 403void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
404 if (dest_buffer == VK_NULL_HANDLE) {
405 return;
406 }
397 static constexpr VkMemoryBarrier READ_BARRIER{ 407 static constexpr VkMemoryBarrier READ_BARRIER{
398 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 408 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
399 .pNext = nullptr, 409 .pNext = nullptr,
@@ -473,6 +483,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
473 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); 483 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
474 }); 484 });
475 } else { 485 } else {
486 if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
487 ReserveNullBuffer();
488 buffer = *null_buffer;
489 offset = 0;
490 }
476 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { 491 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
477 cmdbuf.BindVertexBuffer(index, buffer, offset); 492 cmdbuf.BindVertexBuffer(index, buffer, offset);
478 }); 493 });
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 183b33632..06539c733 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -73,6 +73,8 @@ public:
73 73
74 bool CanReportMemoryUsage() const; 74 bool CanReportMemoryUsage() const;
75 75
76 u32 GetStorageBufferAlignment() const;
77
76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 78 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
77 79
78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 80 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 04a3a861e..2a0f0dbf0 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -24,13 +24,15 @@ using Shader::ImageBufferDescriptor;
24using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; 24using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
25using Tegra::Texture::TexturePair; 25using Tegra::Texture::TexturePair;
26 26
27ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, 27ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
28 DescriptorPool& descriptor_pool,
28 UpdateDescriptorQueue& update_descriptor_queue_, 29 UpdateDescriptorQueue& update_descriptor_queue_,
29 Common::ThreadWorker* thread_worker, 30 Common::ThreadWorker* thread_worker,
30 PipelineStatistics* pipeline_statistics, 31 PipelineStatistics* pipeline_statistics,
31 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, 32 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
32 vk::ShaderModule spv_module_) 33 vk::ShaderModule spv_module_)
33 : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, 34 : device{device_}, pipeline_cache(pipeline_cache_),
35 update_descriptor_queue{update_descriptor_queue_}, info{info_},
34 spv_module(std::move(spv_module_)) { 36 spv_module(std::move(spv_module_)) {
35 if (shader_notify) { 37 if (shader_notify) {
36 shader_notify->MarkShaderBuilding(); 38 shader_notify->MarkShaderBuilding();
@@ -56,23 +58,27 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
56 if (device.IsKhrPipelineExecutablePropertiesEnabled()) { 58 if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
57 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; 59 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
58 } 60 }
59 pipeline = device.GetLogical().CreateComputePipeline({ 61 pipeline = device.GetLogical().CreateComputePipeline(
60 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 62 {
61 .pNext = nullptr, 63 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
62 .flags = flags, 64 .pNext = nullptr,
63 .stage{ 65 .flags = flags,
64 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 66 .stage{
65 .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, 67 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
66 .flags = 0, 68 .pNext =
67 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 69 device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
68 .module = *spv_module, 70 .flags = 0,
69 .pName = "main", 71 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
70 .pSpecializationInfo = nullptr, 72 .module = *spv_module,
73 .pName = "main",
74 .pSpecializationInfo = nullptr,
75 },
76 .layout = *pipeline_layout,
77 .basePipelineHandle = 0,
78 .basePipelineIndex = 0,
71 }, 79 },
72 .layout = *pipeline_layout, 80 *pipeline_cache);
73 .basePipelineHandle = 0, 81
74 .basePipelineIndex = 0,
75 });
76 if (pipeline_statistics) { 82 if (pipeline_statistics) {
77 pipeline_statistics->Collect(*pipeline); 83 pipeline_statistics->Collect(*pipeline);
78 } 84 }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index d70837fc5..78d77027f 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -28,7 +28,8 @@ class Scheduler;
28 28
29class ComputePipeline { 29class ComputePipeline {
30public: 30public:
31 explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, 31 explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache,
32 DescriptorPool& descriptor_pool,
32 UpdateDescriptorQueue& update_descriptor_queue, 33 UpdateDescriptorQueue& update_descriptor_queue,
33 Common::ThreadWorker* thread_worker, 34 Common::ThreadWorker* thread_worker,
34 PipelineStatistics* pipeline_statistics, 35 PipelineStatistics* pipeline_statistics,
@@ -46,6 +47,7 @@ public:
46 47
47private: 48private:
48 const Device& device; 49 const Device& device;
50 vk::PipelineCache& pipeline_cache;
49 UpdateDescriptorQueue& update_descriptor_queue; 51 UpdateDescriptorQueue& update_descriptor_queue;
50 Shader::Info info; 52 Shader::Info info;
51 53
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index d11383bf1..f91bb5a1d 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -234,13 +234,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
234 234
235GraphicsPipeline::GraphicsPipeline( 235GraphicsPipeline::GraphicsPipeline(
236 Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, 236 Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
237 VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, 237 vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify,
238 const Device& device_, DescriptorPool& descriptor_pool,
238 UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, 239 UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
239 PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, 240 PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
240 const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, 241 const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
241 const std::array<const Shader::Info*, NUM_STAGES>& infos) 242 const std::array<const Shader::Info*, NUM_STAGES>& infos)
242 : key{key_}, device{device_}, texture_cache{texture_cache_}, 243 : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
243 buffer_cache{buffer_cache_}, scheduler{scheduler_}, 244 pipeline_cache(pipeline_cache_), scheduler{scheduler_},
244 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { 245 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
245 if (shader_notify) { 246 if (shader_notify) {
246 shader_notify->MarkShaderBuilding(); 247 shader_notify->MarkShaderBuilding();
@@ -644,12 +645,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
644 .pNext = nullptr, 645 .pNext = nullptr,
645 .flags = 0, 646 .flags = 0,
646 .topology = input_assembly_topology, 647 .topology = input_assembly_topology,
647 .primitiveRestartEnable = dynamic.primitive_restart_enable != 0 && 648 .primitiveRestartEnable =
648 ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && 649 dynamic.primitive_restart_enable != 0 &&
649 device.IsTopologyListPrimitiveRestartSupported()) || 650 ((input_assembly_topology != VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
650 SupportsPrimitiveRestart(input_assembly_topology) || 651 device.IsTopologyListPrimitiveRestartSupported()) ||
651 (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST && 652 SupportsPrimitiveRestart(input_assembly_topology) ||
652 device.IsPatchListPrimitiveRestartSupported())), 653 (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST &&
654 device.IsPatchListPrimitiveRestartSupported()))
655 ? VK_TRUE
656 : VK_FALSE,
653 }; 657 };
654 const VkPipelineTessellationStateCreateInfo tessellation_ci{ 658 const VkPipelineTessellationStateCreateInfo tessellation_ci{
655 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, 659 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
@@ -699,7 +703,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
699 .cullMode = static_cast<VkCullModeFlags>( 703 .cullMode = static_cast<VkCullModeFlags>(
700 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), 704 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
701 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), 705 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
702 .depthBiasEnable = (dynamic.depth_bias_enable == 0 ? VK_TRUE : VK_FALSE), 706 .depthBiasEnable = (dynamic.depth_bias_enable != 0 ? VK_TRUE : VK_FALSE),
703 .depthBiasConstantFactor = 0.0f, 707 .depthBiasConstantFactor = 0.0f,
704 .depthBiasClamp = 0.0f, 708 .depthBiasClamp = 0.0f,
705 .depthBiasSlopeFactor = 0.0f, 709 .depthBiasSlopeFactor = 0.0f,
@@ -894,27 +898,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
894 if (device.IsKhrPipelineExecutablePropertiesEnabled()) { 898 if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
895 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; 899 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
896 } 900 }
897 pipeline = device.GetLogical().CreateGraphicsPipeline({ 901 pipeline = device.GetLogical().CreateGraphicsPipeline(
898 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 902 {
899 .pNext = nullptr, 903 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
900 .flags = flags, 904 .pNext = nullptr,
901 .stageCount = static_cast<u32>(shader_stages.size()), 905 .flags = flags,
902 .pStages = shader_stages.data(), 906 .stageCount = static_cast<u32>(shader_stages.size()),
903 .pVertexInputState = &vertex_input_ci, 907 .pStages = shader_stages.data(),
904 .pInputAssemblyState = &input_assembly_ci, 908 .pVertexInputState = &vertex_input_ci,
905 .pTessellationState = &tessellation_ci, 909 .pInputAssemblyState = &input_assembly_ci,
906 .pViewportState = &viewport_ci, 910 .pTessellationState = &tessellation_ci,
907 .pRasterizationState = &rasterization_ci, 911 .pViewportState = &viewport_ci,
908 .pMultisampleState = &multisample_ci, 912 .pRasterizationState = &rasterization_ci,
909 .pDepthStencilState = &depth_stencil_ci, 913 .pMultisampleState = &multisample_ci,
910 .pColorBlendState = &color_blend_ci, 914 .pDepthStencilState = &depth_stencil_ci,
911 .pDynamicState = &dynamic_state_ci, 915 .pColorBlendState = &color_blend_ci,
912 .layout = *pipeline_layout, 916 .pDynamicState = &dynamic_state_ci,
913 .renderPass = render_pass, 917 .layout = *pipeline_layout,
914 .subpass = 0, 918 .renderPass = render_pass,
915 .basePipelineHandle = nullptr, 919 .subpass = 0,
916 .basePipelineIndex = 0, 920 .basePipelineHandle = nullptr,
917 }); 921 .basePipelineIndex = 0,
922 },
923 *pipeline_cache);
918} 924}
919 925
920void GraphicsPipeline::Validate() { 926void GraphicsPipeline::Validate() {
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 1ed2967be..67c657d0e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -70,16 +70,14 @@ class GraphicsPipeline {
70 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; 70 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
71 71
72public: 72public:
73 explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache, 73 explicit GraphicsPipeline(
74 TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify, 74 Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
75 const Device& device, DescriptorPool& descriptor_pool, 75 vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify,
76 UpdateDescriptorQueue& update_descriptor_queue, 76 const Device& device, DescriptorPool& descriptor_pool,
77 Common::ThreadWorker* worker_thread, 77 UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread,
78 PipelineStatistics* pipeline_statistics, 78 PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
79 RenderPassCache& render_pass_cache, 79 const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
80 const GraphicsPipelineCacheKey& key, 80 const std::array<const Shader::Info*, NUM_STAGES>& infos);
81 std::array<vk::ShaderModule, NUM_STAGES> stages,
82 const std::array<const Shader::Info*, NUM_STAGES>& infos);
83 81
84 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; 82 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
85 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; 83 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -133,6 +131,7 @@ private:
133 const Device& device; 131 const Device& device;
134 TextureCache& texture_cache; 132 TextureCache& texture_cache;
135 BufferCache& buffer_cache; 133 BufferCache& buffer_cache;
134 vk::PipelineCache& pipeline_cache;
136 Scheduler& scheduler; 135 Scheduler& scheduler;
137 UpdateDescriptorQueue& update_descriptor_queue; 136 UpdateDescriptorQueue& update_descriptor_queue;
138 137
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3046b72ab..7e69b11d8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -55,6 +55,7 @@ using VideoCommon::GenericEnvironment;
55using VideoCommon::GraphicsEnvironment; 55using VideoCommon::GraphicsEnvironment;
56 56
57constexpr u32 CACHE_VERSION = 10; 57constexpr u32 CACHE_VERSION = 10;
58constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'};
58 59
59template <typename Container> 60template <typename Container>
60auto MakeSpan(Container& container) { 61auto MakeSpan(Container& container) {
@@ -284,6 +285,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
284 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, 285 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
285 texture_cache{texture_cache_}, shader_notify{shader_notify_}, 286 texture_cache{texture_cache_}, shader_notify{shader_notify_},
286 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, 287 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
288 use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
287 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), 289 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
288 serialization_thread(1, "VkPipelineSerialization") { 290 serialization_thread(1, "VkPipelineSerialization") {
289 const auto& float_control{device.FloatControlProperties()}; 291 const auto& float_control{device.FloatControlProperties()};
@@ -329,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
329 .need_declared_frag_colors = false, 331 .need_declared_frag_colors = false,
330 332
331 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, 333 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
334 .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
332 .has_broken_unsigned_image_offsets = false, 335 .has_broken_unsigned_image_offsets = false,
333 .has_broken_signed_operations = false, 336 .has_broken_signed_operations = false,
334 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, 337 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
@@ -341,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
341 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, 344 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
342 .support_snorm_render_buffer = true, 345 .support_snorm_render_buffer = true,
343 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), 346 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
347 .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
348 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
344 }; 349 };
345 350
346 if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { 351 if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
@@ -362,7 +367,12 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
362 }; 367 };
363} 368}
364 369
365PipelineCache::~PipelineCache() = default; 370PipelineCache::~PipelineCache() {
371 if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) {
372 SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
373 CACHE_VERSION);
374 }
375}
366 376
367GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { 377GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
368 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 378 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
@@ -418,6 +428,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
418 } 428 }
419 pipeline_cache_filename = base_dir / "vulkan.bin"; 429 pipeline_cache_filename = base_dir / "vulkan.bin";
420 430
431 if (use_vulkan_pipeline_cache) {
432 vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin";
433 vulkan_pipeline_cache =
434 LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION);
435 }
436
421 struct { 437 struct {
422 std::mutex mutex; 438 std::mutex mutex;
423 size_t total{}; 439 size_t total{};
@@ -496,6 +512,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
496 512
497 workers.WaitForRequests(stop_loading); 513 workers.WaitForRequests(stop_loading);
498 514
515 if (use_vulkan_pipeline_cache) {
516 SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
517 CACHE_VERSION);
518 }
519
499 if (state.statistics) { 520 if (state.statistics) {
500 state.statistics->Report(); 521 state.statistics->Report();
501 } 522 }
@@ -616,10 +637,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
616 previous_stage = &program; 637 previous_stage = &program;
617 } 638 }
618 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 639 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
619 return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache, 640 return std::make_unique<GraphicsPipeline>(
620 &shader_notify, device, descriptor_pool, 641 scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
621 update_descriptor_queue, thread_worker, statistics, 642 descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
622 render_pass_cache, key, std::move(modules), infos); 643 std::move(modules), infos);
623 644
624} catch (const Shader::Exception& exception) { 645} catch (const Shader::Exception& exception) {
625 LOG_ERROR(Render_Vulkan, "{}", exception.what()); 646 LOG_ERROR(Render_Vulkan, "{}", exception.what());
@@ -689,13 +710,108 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
689 spv_module.SetObjectNameEXT(name.c_str()); 710 spv_module.SetObjectNameEXT(name.c_str());
690 } 711 }
691 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 712 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
692 return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, 713 return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool,
693 thread_worker, statistics, &shader_notify, 714 update_descriptor_queue, thread_worker, statistics,
694 program.info, std::move(spv_module)); 715 &shader_notify, program.info, std::move(spv_module));
695 716
696} catch (const Shader::Exception& exception) { 717} catch (const Shader::Exception& exception) {
697 LOG_ERROR(Render_Vulkan, "{}", exception.what()); 718 LOG_ERROR(Render_Vulkan, "{}", exception.what());
698 return nullptr; 719 return nullptr;
699} 720}
700 721
722void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
723 const vk::PipelineCache& pipeline_cache,
724 u32 cache_version) try {
725 std::ofstream file(filename, std::ios::binary);
726 file.exceptions(std::ifstream::failbit);
727 if (!file.is_open()) {
728 LOG_ERROR(Common_Filesystem, "Failed to open Vulkan driver pipeline cache file {}",
729 Common::FS::PathToUTF8String(filename));
730 return;
731 }
732 file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size())
733 .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
734
735 size_t cache_size = 0;
736 std::vector<char> cache_data;
737 if (pipeline_cache) {
738 pipeline_cache.Read(&cache_size, nullptr);
739 cache_data.resize(cache_size);
740 pipeline_cache.Read(&cache_size, cache_data.data());
741 }
742 file.write(cache_data.data(), cache_size);
743
744 LOG_INFO(Render_Vulkan, "Vulkan driver pipelines cached at: {}",
745 Common::FS::PathToUTF8String(filename));
746
747} catch (const std::ios_base::failure& e) {
748 LOG_ERROR(Common_Filesystem, "{}", e.what());
749 if (!Common::FS::RemoveFile(filename)) {
750 LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}",
751 Common::FS::PathToUTF8String(filename));
752 }
753}
754
755vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::path& filename,
756 u32 expected_cache_version) {
757 const auto create_pipeline_cache = [this](size_t data_size, const void* data) {
758 VkPipelineCacheCreateInfo pipeline_cache_ci = {
759 .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
760 .pNext = nullptr,
761 .flags = 0,
762 .initialDataSize = data_size,
763 .pInitialData = data};
764 return device.GetLogical().CreatePipelineCache(pipeline_cache_ci);
765 };
766 try {
767 std::ifstream file(filename, std::ios::binary | std::ios::ate);
768 if (!file.is_open()) {
769 return create_pipeline_cache(0, nullptr);
770 }
771 file.exceptions(std::ifstream::failbit);
772 const auto end{file.tellg()};
773 file.seekg(0, std::ios::beg);
774
775 std::array<char, 8> magic_number;
776 u32 cache_version;
777 file.read(magic_number.data(), magic_number.size())
778 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
779 if (magic_number != VULKAN_CACHE_MAGIC_NUMBER || cache_version != expected_cache_version) {
780 file.close();
781 if (Common::FS::RemoveFile(filename)) {
782 if (magic_number != VULKAN_CACHE_MAGIC_NUMBER) {
783 LOG_ERROR(Common_Filesystem, "Invalid Vulkan driver pipeline cache file");
784 }
785 if (cache_version != expected_cache_version) {
786 LOG_INFO(Common_Filesystem, "Deleting old Vulkan driver pipeline cache");
787 }
788 } else {
789 LOG_ERROR(Common_Filesystem,
790 "Invalid Vulkan pipeline cache file and failed to delete it in \"{}\"",
791 Common::FS::PathToUTF8String(filename));
792 }
793 return create_pipeline_cache(0, nullptr);
794 }
795
796 static constexpr size_t header_size = magic_number.size() + sizeof(cache_version);
797 const size_t cache_size = static_cast<size_t>(end) - header_size;
798 std::vector<char> cache_data(cache_size);
799 file.read(cache_data.data(), cache_size);
800
801 LOG_INFO(Render_Vulkan,
802 "Loaded Vulkan driver pipeline cache: ", Common::FS::PathToUTF8String(filename));
803
804 return create_pipeline_cache(cache_size, cache_data.data());
805
806 } catch (const std::ios_base::failure& e) {
807 LOG_ERROR(Common_Filesystem, "{}", e.what());
808 if (!Common::FS::RemoveFile(filename)) {
809 LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}",
810 Common::FS::PathToUTF8String(filename));
811 }
812
813 return create_pipeline_cache(0, nullptr);
814 }
815}
816
701} // namespace Vulkan 817} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index b4f593ef5..5171912d7 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -135,6 +135,12 @@ private:
135 PipelineStatistics* statistics, 135 PipelineStatistics* statistics,
136 bool build_in_parallel); 136 bool build_in_parallel);
137 137
138 void SerializeVulkanPipelineCache(const std::filesystem::path& filename,
139 const vk::PipelineCache& pipeline_cache, u32 cache_version);
140
141 vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
142 u32 expected_cache_version);
143
138 const Device& device; 144 const Device& device;
139 Scheduler& scheduler; 145 Scheduler& scheduler;
140 DescriptorPool& descriptor_pool; 146 DescriptorPool& descriptor_pool;
@@ -144,6 +150,7 @@ private:
144 TextureCache& texture_cache; 150 TextureCache& texture_cache;
145 VideoCore::ShaderNotify& shader_notify; 151 VideoCore::ShaderNotify& shader_notify;
146 bool use_asynchronous_shaders{}; 152 bool use_asynchronous_shaders{};
153 bool use_vulkan_pipeline_cache{};
147 154
148 GraphicsPipelineCacheKey graphics_key{}; 155 GraphicsPipelineCacheKey graphics_key{};
149 GraphicsPipeline* current_pipeline{}; 156 GraphicsPipeline* current_pipeline{};
@@ -158,6 +165,9 @@ private:
158 165
159 std::filesystem::path pipeline_cache_filename; 166 std::filesystem::path pipeline_cache_filename;
160 167
168 std::filesystem::path vulkan_pipeline_cache_filename;
169 vk::PipelineCache vulkan_pipeline_cache;
170
161 Common::ThreadWorker workers; 171 Common::ThreadWorker workers;
162 Common::ThreadWorker serialization_thread; 172 Common::ThreadWorker serialization_thread;
163 DynamicFeatures dynamic_features; 173 DynamicFeatures dynamic_features;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index e45512d4f..86ef0daeb 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
186 186
187 SCOPE_EXIT({ gpu.TickWork(); }); 187 SCOPE_EXIT({ gpu.TickWork(); });
188 FlushWork(); 188 FlushWork();
189 gpu_memory->FlushCaching();
189 190
190 query_cache.UpdateCounters(); 191 query_cache.UpdateCounters();
191 192
@@ -298,6 +299,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
298 MICROPROFILE_SCOPE(Vulkan_Clearing); 299 MICROPROFILE_SCOPE(Vulkan_Clearing);
299 300
300 FlushWork(); 301 FlushWork();
302 gpu_memory->FlushCaching();
301 303
302 query_cache.UpdateCounters(); 304 query_cache.UpdateCounters();
303 305
@@ -422,6 +424,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
422 424
423void RasterizerVulkan::DispatchCompute() { 425void RasterizerVulkan::DispatchCompute() {
424 FlushWork(); 426 FlushWork();
427 gpu_memory->FlushCaching();
425 428
426 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; 429 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
427 if (!pipeline) { 430 if (!pipeline) {
@@ -510,6 +513,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
510 } 513 }
511} 514}
512 515
516void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
517 {
518 std::scoped_lock lock{texture_cache.mutex};
519 for (const auto& [addr, size] : sequences) {
520 texture_cache.WriteMemory(addr, size);
521 }
522 }
523 {
524 std::scoped_lock lock{buffer_cache.mutex};
525 for (const auto& [addr, size] : sequences) {
526 buffer_cache.WriteMemory(addr, size);
527 }
528 }
529 {
530 for (const auto& [addr, size] : sequences) {
531 query_cache.InvalidateRegion(addr, size);
532 pipeline_cache.InvalidateRegion(addr, size);
533 }
534 }
535}
536
513void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { 537void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
514 if (addr == 0 || size == 0) { 538 if (addr == 0 || size == 0) {
515 return; 539 return;
@@ -634,6 +658,7 @@ void RasterizerVulkan::TickFrame() {
634} 658}
635 659
636bool RasterizerVulkan::AccelerateConditionalRendering() { 660bool RasterizerVulkan::AccelerateConditionalRendering() {
661 gpu_memory->FlushCaching();
637 if (Settings::IsGPULevelHigh()) { 662 if (Settings::IsGPULevelHigh()) {
638 // TODO(Blinkhawk): Reimplement Host conditional rendering. 663 // TODO(Blinkhawk): Reimplement Host conditional rendering.
639 return false; 664 return false;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index deb44dcaa..a0508b57c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -80,6 +80,7 @@ public:
80 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 80 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
81 void InvalidateRegion(VAddr addr, u64 size, 81 void InvalidateRegion(VAddr addr, u64 size,
82 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 82 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
83 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
83 void OnCPUWrite(VAddr addr, u64 size) override; 84 void OnCPUWrite(VAddr addr, u64 size) override;
84 void InvalidateGPUCache() override; 85 void InvalidateGPUCache() override;
85 void UnmapMemory(VAddr addr, u64 size) override; 86 void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index c2e53a5d5..e03685af1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
213 .signalSemaphoreCount = num_signal_semaphores, 213 .signalSemaphoreCount = num_signal_semaphores,
214 .pSignalSemaphores = signal_semaphores.data(), 214 .pSignalSemaphores = signal_semaphores.data(),
215 }; 215 };
216
217 if (on_submit) {
218 on_submit();
219 }
220
216 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { 221 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
217 case VK_SUCCESS: 222 case VK_SUCCESS:
218 break; 223 break;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3858c506c..bd4cb0f7e 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -5,6 +5,7 @@
5 5
6#include <condition_variable> 6#include <condition_variable>
7#include <cstddef> 7#include <cstddef>
8#include <functional>
8#include <memory> 9#include <memory>
9#include <thread> 10#include <thread>
10#include <utility> 11#include <utility>
@@ -66,6 +67,11 @@ public:
66 query_cache = &query_cache_; 67 query_cache = &query_cache_;
67 } 68 }
68 69
70 // Registers a callback to perform on queue submission.
71 void RegisterOnSubmit(std::function<void()>&& func) {
72 on_submit = std::move(func);
73 }
74
69 /// Send work to a separate thread. 75 /// Send work to a separate thread.
70 template <typename T> 76 template <typename T>
71 void Record(T&& command) { 77 void Record(T&& command) {
@@ -216,6 +222,7 @@ private:
216 vk::CommandBuffer current_cmdbuf; 222 vk::CommandBuffer current_cmdbuf;
217 223
218 std::unique_ptr<CommandChunk> chunk; 224 std::unique_ptr<CommandChunk> chunk;
225 std::function<void()> on_submit;
219 226
220 State state; 227 State state;
221 228
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
new file mode 100644
index 000000000..c42594149
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -0,0 +1,222 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/literals.h"
5#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
6#include "video_core/renderer_vulkan/renderer_vulkan.h"
7#include "video_core/renderer_vulkan/vk_shader_util.h"
8#include "video_core/renderer_vulkan/vk_turbo_mode.h"
9#include "video_core/vulkan_common/vulkan_device.h"
10
11namespace Vulkan {
12
13using namespace Common::Literals;
14
15TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
16 : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
17 {
18 std::scoped_lock lk{m_submission_lock};
19 m_submission_time = std::chrono::steady_clock::now();
20 }
21 m_thread = std::jthread([&](auto stop_token) { Run(stop_token); });
22}
23
24TurboMode::~TurboMode() = default;
25
26void TurboMode::QueueSubmitted() {
27 std::scoped_lock lk{m_submission_lock};
28 m_submission_time = std::chrono::steady_clock::now();
29 m_submission_cv.notify_one();
30}
31
32void TurboMode::Run(std::stop_token stop_token) {
33 auto& dld = m_device.GetLogical();
34
35 // Allocate buffer. 2MiB should be sufficient.
36 auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
37 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
38 .pNext = nullptr,
39 .flags = 0,
40 .size = 2_MiB,
41 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
42 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
43 .queueFamilyIndexCount = 0,
44 .pQueueFamilyIndices = nullptr,
45 });
46
47 // Commit some device local memory for the buffer.
48 auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
49
50 // Create the descriptor pool to contain our descriptor.
51 constexpr VkDescriptorPoolSize pool_size{
52 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
53 .descriptorCount = 1,
54 };
55
56 auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
57 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
58 .pNext = nullptr,
59 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
60 .maxSets = 1,
61 .poolSizeCount = 1,
62 .pPoolSizes = &pool_size,
63 });
64
65 // Create the descriptor set layout from the pool.
66 constexpr VkDescriptorSetLayoutBinding layout_binding{
67 .binding = 0,
68 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
69 .descriptorCount = 1,
70 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
71 .pImmutableSamplers = nullptr,
72 };
73
74 auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
75 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
76 .pNext = nullptr,
77 .flags = 0,
78 .bindingCount = 1,
79 .pBindings = &layout_binding,
80 });
81
82 // Actually create the descriptor set.
83 auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
84 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
85 .pNext = nullptr,
86 .descriptorPool = *descriptor_pool,
87 .descriptorSetCount = 1,
88 .pSetLayouts = descriptor_set_layout.address(),
89 });
90
91 // Create the shader.
92 auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
93
94 // Create the pipeline layout.
95 auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
96 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
97 .pNext = nullptr,
98 .flags = 0,
99 .setLayoutCount = 1,
100 .pSetLayouts = descriptor_set_layout.address(),
101 .pushConstantRangeCount = 0,
102 .pPushConstantRanges = nullptr,
103 });
104
105 // Actually create the pipeline.
106 const VkPipelineShaderStageCreateInfo shader_stage{
107 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
108 .pNext = nullptr,
109 .flags = 0,
110 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
111 .module = *shader,
112 .pName = "main",
113 .pSpecializationInfo = nullptr,
114 };
115
116 auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
117 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
118 .pNext = nullptr,
119 .flags = 0,
120 .stage = shader_stage,
121 .layout = *pipeline_layout,
122 .basePipelineHandle = VK_NULL_HANDLE,
123 .basePipelineIndex = 0,
124 });
125
126 // Create a fence to wait on.
127 auto fence = dld.CreateFence(VkFenceCreateInfo{
128 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
129 .pNext = nullptr,
130 .flags = 0,
131 });
132
133 // Create a command pool to allocate a command buffer from.
134 auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
135 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
136 .pNext = nullptr,
137 .flags =
138 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
139 .queueFamilyIndex = m_device.GetGraphicsFamily(),
140 });
141
142 // Create a single command buffer.
143 auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
144 auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
145
146 while (!stop_token.stop_requested()) {
147 // Reset the fence.
148 fence.Reset();
149
150 // Update descriptor set.
151 const VkDescriptorBufferInfo buffer_info{
152 .buffer = *buffer,
153 .offset = 0,
154 .range = VK_WHOLE_SIZE,
155 };
156
157 const VkWriteDescriptorSet buffer_write{
158 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
159 .pNext = nullptr,
160 .dstSet = descriptor_set[0],
161 .dstBinding = 0,
162 .dstArrayElement = 0,
163 .descriptorCount = 1,
164 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
165 .pImageInfo = nullptr,
166 .pBufferInfo = &buffer_info,
167 .pTexelBufferView = nullptr,
168 };
169
170 dld.UpdateDescriptorSets(std::array{buffer_write}, {});
171
172 // Set up the command buffer.
173 cmdbuf.Begin(VkCommandBufferBeginInfo{
174 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
175 .pNext = nullptr,
176 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
177 .pInheritanceInfo = nullptr,
178 });
179
180 // Clear the buffer.
181 cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0);
182
183 // Bind descriptor set.
184 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
185 descriptor_set, {});
186
187 // Bind the pipeline.
188 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
189
190 // Dispatch.
191 cmdbuf.Dispatch(64, 64, 1);
192
193 // Finish.
194 cmdbuf.End();
195
196 const VkSubmitInfo submit_info{
197 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
198 .pNext = nullptr,
199 .waitSemaphoreCount = 0,
200 .pWaitSemaphores = nullptr,
201 .pWaitDstStageMask = nullptr,
202 .commandBufferCount = 1,
203 .pCommandBuffers = cmdbuf.address(),
204 .signalSemaphoreCount = 0,
205 .pSignalSemaphores = nullptr,
206 };
207
208 m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence);
209
210 // Wait for completion.
211 fence.Wait();
212
213 // Wait for the next graphics queue submission if necessary.
214 std::unique_lock lk{m_submission_lock};
215 Common::CondvarWait(m_submission_cv, lk, stop_token, [this] {
216 return (std::chrono::steady_clock::now() - m_submission_time) <=
217 std::chrono::milliseconds{100};
218 });
219 }
220}
221
222} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h
new file mode 100644
index 000000000..99b5ac50b
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h
@@ -0,0 +1,35 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <chrono>
7#include <mutex>
8
9#include "common/polyfill_thread.h"
10#include "video_core/vulkan_common/vulkan_device.h"
11#include "video_core/vulkan_common/vulkan_memory_allocator.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13
14namespace Vulkan {
15
16class TurboMode {
17public:
18 explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld);
19 ~TurboMode();
20
21 void QueueSubmitted();
22
23private:
24 void Run(std::stop_token stop_token);
25
26 Device m_device;
27 MemoryAllocator m_allocator;
28 std::mutex m_submission_lock;
29 std::condition_variable_any m_submission_cv;
30 std::chrono::time_point<std::chrono::steady_clock> m_submission_time{};
31
32 std::jthread m_thread;
33};
34
35} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 5c5bfa18d..23d922e5d 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -74,30 +74,6 @@ enum class NvidiaArchitecture {
74 VoltaOrOlder, 74 VoltaOrOlder,
75}; 75};
76 76
77constexpr std::array REQUIRED_EXTENSIONS{
78 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
79 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
80#ifdef _WIN32
81 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
82#endif
83#ifdef __unix__
84 VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
85#endif
86};
87
88constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{
89 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
90 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
91 VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
92 VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
93 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
94 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
95};
96
97constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{
98 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
99};
100
101template <typename T> 77template <typename T>
102void SetNext(void**& next, T& data) { 78void SetNext(void**& next, T& data) {
103 *next = &data; 79 *next = &data;
@@ -286,24 +262,9 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
286 return format_properties; 262 return format_properties;
287} 263}
288 264
289std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
290 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
291 std::vector<std::string> supported_extensions;
292 supported_extensions.reserve(extensions.size());
293 for (const auto& extension : extensions) {
294 supported_extensions.emplace_back(extension.extensionName);
295 }
296 return supported_extensions;
297}
298
299bool IsExtensionSupported(std::span<const std::string> supported_extensions,
300 std::string_view extension) {
301 return std::ranges::find(supported_extensions, extension) != supported_extensions.end();
302}
303
304NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, 265NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
305 std::span<const std::string> exts) { 266 const std::set<std::string, std::less<>>& exts) {
306 if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { 267 if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
307 VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; 268 VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
308 shading_rate_props.sType = 269 shading_rate_props.sType =
309 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; 270 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
@@ -316,423 +277,55 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
316 return NvidiaArchitecture::AmpereOrNewer; 277 return NvidiaArchitecture::AmpereOrNewer;
317 } 278 }
318 } 279 }
319 if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { 280 if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) {
320 return NvidiaArchitecture::Turing; 281 return NvidiaArchitecture::Turing;
321 } 282 }
322 return NvidiaArchitecture::VoltaOrOlder; 283 return NvidiaArchitecture::VoltaOrOlder;
323} 284}
285
286std::vector<const char*> ExtensionListForVulkan(
287 const std::set<std::string, std::less<>>& extensions) {
288 std::vector<const char*> output;
289 for (const auto& extension : extensions) {
290 output.push_back(extension.c_str());
291 }
292 return output;
293}
294
324} // Anonymous namespace 295} // Anonymous namespace
325 296
326Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, 297Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
327 const vk::InstanceDispatch& dld_) 298 const vk::InstanceDispatch& dld_)
328 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 299 : instance{instance_}, dld{dld_}, physical{physical_},
329 instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions(
330 physical)},
331 format_properties(GetFormatProperties(physical)) { 300 format_properties(GetFormatProperties(physical)) {
332 CheckSuitability(surface != nullptr); 301 // Get suitability and device properties.
333 SetupFamilies(surface); 302 const bool is_suitable = GetSuitability(surface != nullptr);
334 SetupFeatures();
335 SetupProperties();
336
337 const auto queue_cis = GetDeviceQueueCreateInfos();
338 const std::vector extensions = LoadExtensions(surface != nullptr);
339
340 VkPhysicalDeviceFeatures2 features2{
341 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
342 .pNext = nullptr,
343 .features{
344 .robustBufferAccess = true,
345 .fullDrawIndexUint32 = false,
346 .imageCubeArray = true,
347 .independentBlend = true,
348 .geometryShader = true,
349 .tessellationShader = true,
350 .sampleRateShading = true,
351 .dualSrcBlend = true,
352 .logicOp = true,
353 .multiDrawIndirect = true,
354 .drawIndirectFirstInstance = true,
355 .depthClamp = true,
356 .depthBiasClamp = true,
357 .fillModeNonSolid = true,
358 .depthBounds = is_depth_bounds_supported,
359 .wideLines = true,
360 .largePoints = true,
361 .alphaToOne = false,
362 .multiViewport = true,
363 .samplerAnisotropy = true,
364 .textureCompressionETC2 = false,
365 .textureCompressionASTC_LDR = is_optimal_astc_supported,
366 .textureCompressionBC = false,
367 .occlusionQueryPrecise = true,
368 .pipelineStatisticsQuery = false,
369 .vertexPipelineStoresAndAtomics = true,
370 .fragmentStoresAndAtomics = true,
371 .shaderTessellationAndGeometryPointSize = false,
372 .shaderImageGatherExtended = true,
373 .shaderStorageImageExtendedFormats = false,
374 .shaderStorageImageMultisample = is_shader_storage_image_multisample,
375 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
376 .shaderStorageImageWriteWithoutFormat = true,
377 .shaderUniformBufferArrayDynamicIndexing = false,
378 .shaderSampledImageArrayDynamicIndexing = false,
379 .shaderStorageBufferArrayDynamicIndexing = false,
380 .shaderStorageImageArrayDynamicIndexing = false,
381 .shaderClipDistance = true,
382 .shaderCullDistance = true,
383 .shaderFloat64 = is_shader_float64_supported,
384 .shaderInt64 = is_shader_int64_supported,
385 .shaderInt16 = is_shader_int16_supported,
386 .shaderResourceResidency = false,
387 .shaderResourceMinLod = false,
388 .sparseBinding = false,
389 .sparseResidencyBuffer = false,
390 .sparseResidencyImage2D = false,
391 .sparseResidencyImage3D = false,
392 .sparseResidency2Samples = false,
393 .sparseResidency4Samples = false,
394 .sparseResidency8Samples = false,
395 .sparseResidency16Samples = false,
396 .sparseResidencyAliased = false,
397 .variableMultisampleRate = false,
398 .inheritedQueries = false,
399 },
400 };
401 const void* first_next = &features2;
402 void** next = &features2.pNext;
403
404 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{
405 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
406 .pNext = nullptr,
407 .timelineSemaphore = true,
408 };
409 SetNext(next, timeline_semaphore);
410
411 VkPhysicalDevice16BitStorageFeatures bit16_storage{
412 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
413 .pNext = nullptr,
414 .storageBuffer16BitAccess = true,
415 .uniformAndStorageBuffer16BitAccess = true,
416 .storagePushConstant16 = false,
417 .storageInputOutput16 = false,
418 };
419 SetNext(next, bit16_storage);
420
421 VkPhysicalDevice8BitStorageFeatures bit8_storage{
422 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES,
423 .pNext = nullptr,
424 .storageBuffer8BitAccess = true,
425 .uniformAndStorageBuffer8BitAccess = true,
426 .storagePushConstant8 = false,
427 };
428 SetNext(next, bit8_storage);
429
430 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{
431 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
432 .pNext = nullptr,
433 .robustBufferAccess2 = true,
434 .robustImageAccess2 = true,
435 .nullDescriptor = true,
436 };
437 SetNext(next, robustness2);
438
439 VkPhysicalDeviceHostQueryResetFeatures host_query_reset{
440 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
441 .pNext = nullptr,
442 .hostQueryReset = true,
443 };
444 SetNext(next, host_query_reset);
445
446 VkPhysicalDeviceVariablePointerFeatures variable_pointers{
447 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES,
448 .pNext = nullptr,
449 .variablePointersStorageBuffer = VK_TRUE,
450 .variablePointers = VK_TRUE,
451 };
452 SetNext(next, variable_pointers);
453
454 VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{
455 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES,
456 .pNext = nullptr,
457 .shaderDemoteToHelperInvocation = true,
458 };
459 SetNext(next, demote);
460
461 VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{
462 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES,
463 .pNext = nullptr,
464 .shaderDrawParameters = true,
465 };
466 SetNext(next, draw_parameters);
467
468 VkPhysicalDeviceShaderFloat16Int8Features float16_int8;
469 if (is_int8_supported || is_float16_supported) {
470 float16_int8 = {
471 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES,
472 .pNext = nullptr,
473 .shaderFloat16 = is_float16_supported,
474 .shaderInt8 = is_int8_supported,
475 };
476 SetNext(next, float16_int8);
477 }
478 if (!is_float16_supported) {
479 LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
480 }
481 if (!is_int8_supported) {
482 LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively");
483 }
484
485 if (!nv_viewport_swizzle) {
486 LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
487 }
488
489 if (!nv_viewport_array2) {
490 LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
491 }
492
493 if (!nv_geometry_shader_passthrough) {
494 LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
495 }
496 303
497 VkPhysicalDeviceUniformBufferStandardLayoutFeatures std430_layout; 304 const VkDriverId driver_id = properties.driver.driverID;
498 if (khr_uniform_buffer_standard_layout) { 305 const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
499 std430_layout = { 306 const bool is_amd_driver =
500 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES, 307 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
501 .pNext = nullptr, 308 const bool is_amd = is_amd_driver || is_radv;
502 .uniformBufferStandardLayout = true, 309 const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
503 }; 310 const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
504 SetNext(next, std430_layout); 311 const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
505 } else { 312 const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
506 LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
507 }
508
509 VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
510 if (ext_index_type_uint8) {
511 index_type_uint8 = {
512 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT,
513 .pNext = nullptr,
514 .indexTypeUint8 = true,
515 };
516 SetNext(next, index_type_uint8);
517 } else {
518 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
519 }
520
521 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart;
522 if (is_topology_list_restart_supported || is_patch_list_restart_supported) {
523 primitive_topology_list_restart = {
524 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT,
525 .pNext = nullptr,
526 .primitiveTopologyListRestart = is_topology_list_restart_supported,
527 .primitiveTopologyPatchListRestart = is_patch_list_restart_supported,
528 };
529 SetNext(next, primitive_topology_list_restart);
530 } else {
531 LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart");
532 }
533
534 VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
535 if (ext_transform_feedback) {
536 transform_feedback = {
537 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
538 .pNext = nullptr,
539 .transformFeedback = true,
540 .geometryStreams = true,
541 };
542 SetNext(next, transform_feedback);
543 } else {
544 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
545 }
546
547 VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
548 if (ext_custom_border_color) {
549 custom_border = {
550 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
551 .pNext = nullptr,
552 .customBorderColors = VK_TRUE,
553 .customBorderColorWithoutFormat = VK_TRUE,
554 };
555 SetNext(next, custom_border);
556 } else {
557 LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
558 }
559
560 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
561 if (ext_extended_dynamic_state) {
562 dynamic_state = {
563 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT,
564 .pNext = nullptr,
565 .extendedDynamicState = VK_TRUE,
566 };
567 SetNext(next, dynamic_state);
568 } else {
569 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
570 }
571
572 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state_2;
573 if (ext_extended_dynamic_state_2) {
574 dynamic_state_2 = {
575 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT,
576 .pNext = nullptr,
577 .extendedDynamicState2 = VK_TRUE,
578 .extendedDynamicState2LogicOp = ext_extended_dynamic_state_2_extra ? VK_TRUE : VK_FALSE,
579 .extendedDynamicState2PatchControlPoints = VK_FALSE,
580 };
581 SetNext(next, dynamic_state_2);
582 } else {
583 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 2");
584 }
585
586 VkPhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3;
587 if (ext_extended_dynamic_state_3) {
588 dynamic_state_3 = {
589 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT,
590 .pNext = nullptr,
591 .extendedDynamicState3TessellationDomainOrigin = VK_FALSE,
592 .extendedDynamicState3DepthClampEnable =
593 ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE,
594 .extendedDynamicState3PolygonMode = VK_FALSE,
595 .extendedDynamicState3RasterizationSamples = VK_FALSE,
596 .extendedDynamicState3SampleMask = VK_FALSE,
597 .extendedDynamicState3AlphaToCoverageEnable = VK_FALSE,
598 .extendedDynamicState3AlphaToOneEnable = VK_FALSE,
599 .extendedDynamicState3LogicOpEnable =
600 ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE,
601 .extendedDynamicState3ColorBlendEnable =
602 ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE,
603 .extendedDynamicState3ColorBlendEquation =
604 ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE,
605 .extendedDynamicState3ColorWriteMask =
606 ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE,
607 .extendedDynamicState3RasterizationStream = VK_FALSE,
608 .extendedDynamicState3ConservativeRasterizationMode = VK_FALSE,
609 .extendedDynamicState3ExtraPrimitiveOverestimationSize = VK_FALSE,
610 .extendedDynamicState3DepthClipEnable = VK_FALSE,
611 .extendedDynamicState3SampleLocationsEnable = VK_FALSE,
612 .extendedDynamicState3ColorBlendAdvanced = VK_FALSE,
613 .extendedDynamicState3ProvokingVertexMode = VK_FALSE,
614 .extendedDynamicState3LineRasterizationMode = VK_FALSE,
615 .extendedDynamicState3LineStippleEnable = VK_FALSE,
616 .extendedDynamicState3DepthClipNegativeOneToOne = VK_FALSE,
617 .extendedDynamicState3ViewportWScalingEnable = VK_FALSE,
618 .extendedDynamicState3ViewportSwizzle = VK_FALSE,
619 .extendedDynamicState3CoverageToColorEnable = VK_FALSE,
620 .extendedDynamicState3CoverageToColorLocation = VK_FALSE,
621 .extendedDynamicState3CoverageModulationMode = VK_FALSE,
622 .extendedDynamicState3CoverageModulationTableEnable = VK_FALSE,
623 .extendedDynamicState3CoverageModulationTable = VK_FALSE,
624 .extendedDynamicState3CoverageReductionMode = VK_FALSE,
625 .extendedDynamicState3RepresentativeFragmentTestEnable = VK_FALSE,
626 .extendedDynamicState3ShadingRateImageEnable = VK_FALSE,
627 };
628 SetNext(next, dynamic_state_3);
629 } else {
630 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 3");
631 }
632
633 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
634 if (ext_line_rasterization) {
635 line_raster = {
636 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
637 .pNext = nullptr,
638 .rectangularLines = VK_TRUE,
639 .bresenhamLines = VK_FALSE,
640 .smoothLines = VK_TRUE,
641 .stippledRectangularLines = VK_FALSE,
642 .stippledBresenhamLines = VK_FALSE,
643 .stippledSmoothLines = VK_FALSE,
644 };
645 SetNext(next, line_raster);
646 } else {
647 LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
648 }
649
650 if (!ext_conservative_rasterization) {
651 LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
652 }
653
654 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
655 if (ext_provoking_vertex) {
656 provoking_vertex = {
657 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
658 .pNext = nullptr,
659 .provokingVertexLast = VK_TRUE,
660 .transformFeedbackPreservesProvokingVertex = VK_TRUE,
661 };
662 SetNext(next, provoking_vertex);
663 } else {
664 LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
665 }
666
667 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
668 if (ext_vertex_input_dynamic_state) {
669 vertex_input_dynamic = {
670 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
671 .pNext = nullptr,
672 .vertexInputDynamicState = VK_TRUE,
673 };
674 SetNext(next, vertex_input_dynamic);
675 } else {
676 LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
677 }
678
679 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
680 if (ext_shader_atomic_int64) {
681 atomic_int64 = {
682 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES,
683 .pNext = nullptr,
684 .shaderBufferInt64Atomics = VK_TRUE,
685 .shaderSharedInt64Atomics = VK_TRUE,
686 };
687 SetNext(next, atomic_int64);
688 }
689 313
690 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; 314 if (is_mvk && !is_suitable) {
691 if (khr_workgroup_memory_explicit_layout && is_shader_int16_supported) { 315 LOG_WARNING(Render_Vulkan, "Unsuitable driver is MoltenVK, continuing anyway");
692 workgroup_layout = { 316 } else if (!is_suitable) {
693 .sType = 317 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
694 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
695 .pNext = nullptr,
696 .workgroupMemoryExplicitLayout = VK_TRUE,
697 .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
698 .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
699 .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
700 };
701 SetNext(next, workgroup_layout);
702 } else if (khr_workgroup_memory_explicit_layout) {
703 // TODO(lat9nq): Find a proper fix for this
704 LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_workgroup_memory_explicit_layout due to a "
705 "yuzu bug when host driver does not support 16-bit integers");
706 khr_workgroup_memory_explicit_layout = false;
707 } 318 }
708 319
709 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; 320 SetupFamilies(surface);
710 if (khr_pipeline_executable_properties) { 321 const auto queue_cis = GetDeviceQueueCreateInfos();
711 LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times");
712 executable_properties = {
713 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR,
714 .pNext = nullptr,
715 .pipelineExecutableInfo = VK_TRUE,
716 };
717 SetNext(next, executable_properties);
718 }
719
720 if (!ext_depth_range_unrestricted) {
721 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
722 }
723 322
724 VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; 323 // GetSuitability has already configured the linked list of features for us.
725 if (ext_depth_clip_control) { 324 // Reuse it here.
726 depth_clip_control_features = { 325 const void* first_next = &features2;
727 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT,
728 .pNext = nullptr,
729 .depthClipControl = VK_TRUE,
730 };
731 SetNext(next, depth_clip_control_features);
732 }
733 326
734 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; 327 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv{};
735 if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { 328 if (Settings::values.enable_nsight_aftermath && extensions.device_diagnostics_config) {
736 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); 329 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
737 330
738 diagnostics_nv = { 331 diagnostics_nv = {
@@ -744,33 +337,39 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
744 }; 337 };
745 first_next = &diagnostics_nv; 338 first_next = &diagnostics_nv;
746 } 339 }
747 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
748 340
749 is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; 341 is_blit_depth_stencil_supported = TestDepthStencilBlits();
750 is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; 342 is_optimal_astc_supported = ComputeIsOptimalAstcSupported();
751 is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || 343 is_warp_potentially_bigger = !extensions.subgroup_size_control ||
752 properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; 344 properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize;
345
346 is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
347 is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
348 is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
349 properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
350
351 supports_d24_depth =
352 IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
353 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
753 354
754 CollectPhysicalMemoryInfo(); 355 CollectPhysicalMemoryInfo();
755 CollectTelemetryParameters();
756 CollectToolingInfo(); 356 CollectToolingInfo();
757 357
758 if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { 358 if (is_nvidia) {
759 const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; 359 const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
760
761 const auto arch = GetNvidiaArchitecture(physical, supported_extensions); 360 const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
762 switch (arch) { 361 switch (arch) {
763 case NvidiaArchitecture::AmpereOrNewer: 362 case NvidiaArchitecture::AmpereOrNewer:
764 LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); 363 LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
765 is_float16_supported = false; 364 features.shader_float16_int8.shaderFloat16 = false;
766 break; 365 break;
767 case NvidiaArchitecture::Turing: 366 case NvidiaArchitecture::Turing:
768 break; 367 break;
769 case NvidiaArchitecture::VoltaOrOlder: 368 case NvidiaArchitecture::VoltaOrOlder:
770 if (nv_major_version < 527) { 369 if (nv_major_version < 527) {
771 LOG_WARNING(Render_Vulkan, 370 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
772 "Blacklisting Volta and older from VK_KHR_push_descriptor"); 371 extensions.push_descriptor = false;
773 khr_push_descriptor = false; 372 loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
774 } 373 }
775 break; 374 break;
776 } 375 }
@@ -779,75 +378,75 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
779 cant_blit_msaa = true; 378 cant_blit_msaa = true;
780 } 379 }
781 } 380 }
782 const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; 381 if (extensions.extended_dynamic_state && is_radv) {
783 if (ext_extended_dynamic_state && is_radv) {
784 // Mask driver version variant 382 // Mask driver version variant
785 const u32 version = (properties.driverVersion << 3) >> 3; 383 const u32 version = (properties.properties.driverVersion << 3) >> 3;
786 if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { 384 if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
787 LOG_WARNING(Render_Vulkan, 385 LOG_WARNING(Render_Vulkan,
788 "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); 386 "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
789 ext_extended_dynamic_state = false; 387 extensions.extended_dynamic_state = false;
388 loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
790 } 389 }
791 } 390 }
792 if (ext_vertex_input_dynamic_state && is_radv) { 391 if (extensions.extended_dynamic_state2 && is_radv) {
392 const u32 version = (properties.properties.driverVersion << 3) >> 3;
393 if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
394 LOG_WARNING(
395 Render_Vulkan,
396 "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2");
397 features.extended_dynamic_state2.extendedDynamicState2 = false;
398 features.extended_dynamic_state2.extendedDynamicState2LogicOp = false;
399 features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints = false;
400 extensions.extended_dynamic_state2 = false;
401 loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
402 }
403 }
404 if (extensions.vertex_input_dynamic_state && is_radv) {
793 // TODO(ameerj): Blacklist only offending driver versions 405 // TODO(ameerj): Blacklist only offending driver versions
794 // TODO(ameerj): Confirm if RDNA1 is affected 406 // TODO(ameerj): Confirm if RDNA1 is affected
795 const bool is_rdna2 = 407 const bool is_rdna2 =
796 IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); 408 supported_extensions.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
797 if (is_rdna2) { 409 if (is_rdna2) {
798 LOG_WARNING(Render_Vulkan, 410 LOG_WARNING(Render_Vulkan,
799 "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); 411 "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware");
800 ext_vertex_input_dynamic_state = false; 412 extensions.vertex_input_dynamic_state = false;
413 loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
801 } 414 }
802 } 415 }
803 if (ext_extended_dynamic_state_2 && is_radv) {
804 const u32 version = (properties.driverVersion << 3) >> 3;
805 if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
806 LOG_WARNING(
807 Render_Vulkan,
808 "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2");
809 ext_extended_dynamic_state_2 = false;
810 ext_extended_dynamic_state_2_extra = false;
811 }
812 }
813 sets_per_pool = 64;
814 416
815 const bool is_amd = 417 sets_per_pool = 64;
816 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; 418 if (is_amd_driver) {
817 if (is_amd) {
818 // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. 419 // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2.
819 sets_per_pool = 96; 420 sets_per_pool = 96;
820 // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken. 421 // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken.
821 if (!is_float16_supported) { 422 if (!features.shader_float16_int8.shaderFloat16) {
822 LOG_WARNING( 423 LOG_WARNING(Render_Vulkan,
823 Render_Vulkan, 424 "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT");
824 "AMD GCN4 and earlier do not properly support VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT");
825 has_broken_cube_compatibility = true; 425 has_broken_cube_compatibility = true;
826 } 426 }
827 } 427 }
828 const bool is_amd_or_radv = is_amd || is_radv; 428 if (extensions.sampler_filter_minmax && is_amd) {
829 if (ext_sampler_filter_minmax && is_amd_or_radv) {
830 // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. 429 // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
831 if (!is_float16_supported) { 430 if (!features.shader_float16_int8.shaderFloat16) {
832 LOG_WARNING(Render_Vulkan, 431 LOG_WARNING(Render_Vulkan,
833 "Blacklisting AMD GCN4 and earlier for VK_EXT_sampler_filter_minmax"); 432 "AMD GCN4 and earlier have broken VK_EXT_sampler_filter_minmax");
834 ext_sampler_filter_minmax = false; 433 extensions.sampler_filter_minmax = false;
434 loaded_extensions.erase(VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME);
835 } 435 }
836 } 436 }
837 437
838 const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; 438 if (extensions.vertex_input_dynamic_state && is_intel_windows) {
839 const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; 439 const u32 version = (properties.properties.driverVersion << 3) >> 3;
840 if (ext_vertex_input_dynamic_state && is_intel_windows) {
841 const u32 version = (properties.driverVersion << 3) >> 3;
842 if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) { 440 if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
843 LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); 441 LOG_WARNING(Render_Vulkan, "Intel has broken VK_EXT_vertex_input_dynamic_state");
844 ext_vertex_input_dynamic_state = false; 442 extensions.vertex_input_dynamic_state = false;
443 loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
845 } 444 }
846 } 445 }
847 if (is_float16_supported && is_intel_windows) { 446 if (features.shader_float16_int8.shaderFloat16 && is_intel_windows) {
848 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. 447 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
849 LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); 448 LOG_WARNING(Render_Vulkan, "Intel has broken float16 math");
850 is_float16_supported = false; 449 features.shader_float16_int8.shaderFloat16 = false;
851 } 450 }
852 if (is_intel_windows) { 451 if (is_intel_windows) {
853 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); 452 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
@@ -857,10 +456,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
857 LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format"); 456 LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format");
858 must_emulate_bgr565 = true; 457 must_emulate_bgr565 = true;
859 } 458 }
459 if (is_mvk) {
460 LOG_WARNING(Render_Vulkan,
461 "MVK driver breaks when using more than 16 vertex attributes/bindings");
462 properties.properties.limits.maxVertexInputAttributes =
463 std::min(properties.properties.limits.maxVertexInputAttributes, 16U);
464 properties.properties.limits.maxVertexInputBindings =
465 std::min(properties.properties.limits.maxVertexInputBindings, 16U);
466 }
860 467
861 supports_d24_depth = 468 logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions),
862 IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, 469 first_next, dld);
863 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
864 470
865 graphics_queue = logical.GetQueue(graphics_family); 471 graphics_queue = logical.GetQueue(graphics_family);
866 present_queue = logical.GetQueue(present_family); 472 present_queue = logical.GetQueue(present_family);
@@ -915,7 +521,7 @@ void Device::SaveShader(std::span<const u32> spirv) const {
915 } 521 }
916} 522}
917 523
918bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { 524bool Device::ComputeIsOptimalAstcSupported() const {
919 // Disable for now to avoid converting ASTC twice. 525 // Disable for now to avoid converting ASTC twice.
920 static constexpr std::array astc_formats = { 526 static constexpr std::array astc_formats = {
921 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 527 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
@@ -933,7 +539,7 @@ bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) co
933 VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, 539 VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
934 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 540 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
935 }; 541 };
936 if (!features.textureCompressionASTC_LDR) { 542 if (!features.features.textureCompressionASTC_LDR) {
937 return false; 543 return false;
938 } 544 }
939 const auto format_feature_usage{ 545 const auto format_feature_usage{
@@ -971,7 +577,7 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
971} 577}
972 578
973std::string Device::GetDriverName() const { 579std::string Device::GetDriverName() const {
974 switch (driver_id) { 580 switch (properties.driver.driverID) {
975 case VK_DRIVER_ID_AMD_PROPRIETARY: 581 case VK_DRIVER_ID_AMD_PROPRIETARY:
976 return "AMD"; 582 return "AMD";
977 case VK_DRIVER_ID_AMD_OPEN_SOURCE: 583 case VK_DRIVER_ID_AMD_OPEN_SOURCE:
@@ -987,507 +593,336 @@ std::string Device::GetDriverName() const {
987 case VK_DRIVER_ID_MESA_LLVMPIPE: 593 case VK_DRIVER_ID_MESA_LLVMPIPE:
988 return "LAVAPIPE"; 594 return "LAVAPIPE";
989 default: 595 default:
990 return vendor_name; 596 return properties.driver.driverName;
991 } 597 }
992} 598}
993 599
994static std::vector<const char*> ExtensionsRequiredForInstanceVersion(u32 available_version) { 600bool Device::ShouldBoostClocks() const {
995 std::vector<const char*> extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()}; 601 const auto driver_id = properties.driver.driverID;
602 const auto vendor_id = properties.properties.vendorID;
603 const auto device_id = properties.properties.deviceID;
996 604
997 if (available_version < VK_API_VERSION_1_2) { 605 const bool validated_driver =
998 extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(), 606 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
999 REQUIRED_EXTENSIONS_BEFORE_1_2.end()); 607 driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY ||
1000 } 608 driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
609 driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1001 610
1002 if (available_version < VK_API_VERSION_1_3) { 611 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
1003 extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(),
1004 REQUIRED_EXTENSIONS_BEFORE_1_3.end());
1005 }
1006 612
1007 return extensions; 613 return validated_driver && !is_steam_deck;
1008} 614}
1009 615
1010void Device::CheckSuitability(bool requires_swapchain) const { 616bool Device::GetSuitability(bool requires_swapchain) {
1011 std::vector<const char*> required_extensions = 617 // Assume we will be suitable.
1012 ExtensionsRequiredForInstanceVersion(instance_version); 618 bool suitable = true;
1013 std::vector<const char*> available_extensions;
1014 619
1015 if (requires_swapchain) { 620 // Configure properties.
1016 required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 621 properties.properties = physical.GetProperties();
1017 } 622
623 // Set instance version.
624 instance_version = properties.properties.apiVersion;
1018 625
626 // Minimum of API version 1.1 is required. (This is well-supported.)
627 ASSERT(instance_version >= VK_API_VERSION_1_1);
628
629 // Get available extensions.
1019 auto extension_properties = physical.EnumerateDeviceExtensionProperties(); 630 auto extension_properties = physical.EnumerateDeviceExtensionProperties();
1020 631
632 // Get the set of supported extensions.
633 supported_extensions.clear();
1021 for (const VkExtensionProperties& property : extension_properties) { 634 for (const VkExtensionProperties& property : extension_properties) {
1022 available_extensions.push_back(property.extensionName); 635 supported_extensions.insert(property.extensionName);
1023 } 636 }
1024 637
1025 bool has_all_required_extensions = true; 638 // Generate list of extensions to load.
1026 for (const char* requirement_name : required_extensions) { 639 loaded_extensions.clear();
1027 const bool found =
1028 std::ranges::any_of(available_extensions, [&](const char* extension_name) {
1029 return std::strcmp(requirement_name, extension_name) == 0;
1030 });
1031 640
1032 if (!found) { 641#define EXTENSION(prefix, macro_name, var_name) \
1033 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name); 642 if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \
1034 has_all_required_extensions = false; 643 loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
1035 } 644 extensions.var_name = true; \
1036 } 645 }
1037 646#define FEATURE_EXTENSION(prefix, struct_name, macro_name, var_name) \
1038 if (!has_all_required_extensions) { 647 if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \
1039 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 648 loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
649 extensions.var_name = true; \
1040 } 650 }
1041 651
1042 struct LimitTuple { 652 if (instance_version < VK_API_VERSION_1_2) {
1043 u32 minimum; 653 FOR_EACH_VK_FEATURE_1_2(FEATURE_EXTENSION);
1044 u32 value; 654 }
1045 const char* name; 655 if (instance_version < VK_API_VERSION_1_3) {
1046 }; 656 FOR_EACH_VK_FEATURE_1_3(FEATURE_EXTENSION);
1047 const VkPhysicalDeviceLimits& limits{properties.limits};
1048 const std::array limits_report{
1049 LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
1050 LimitTuple{16, limits.maxViewports, "maxViewports"},
1051 LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
1052 LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
1053 };
1054 for (const auto& tuple : limits_report) {
1055 if (tuple.value < tuple.minimum) {
1056 LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name,
1057 tuple.minimum, tuple.value);
1058 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
1059 }
1060 } 657 }
1061 VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{};
1062 demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES;
1063 demote.pNext = nullptr;
1064 658
1065 VkPhysicalDeviceVariablePointerFeatures variable_pointers{}; 659 FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION);
1066 variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES; 660 FOR_EACH_VK_EXTENSION(EXTENSION);
1067 variable_pointers.pNext = &demote; 661#ifdef _WIN32
662 FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
663#endif
1068 664
1069 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; 665#undef FEATURE_EXTENSION
1070 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; 666#undef EXTENSION
1071 robustness2.pNext = &variable_pointers;
1072 667
1073 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{}; 668 // Some extensions are mandatory. Check those.
1074 timeline_semaphore.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; 669#define CHECK_EXTENSION(extension_name) \
1075 timeline_semaphore.pNext = &robustness2; 670 if (!loaded_extensions.contains(extension_name)) { \
671 LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \
672 suitable = false; \
673 }
1076 674
1077 VkPhysicalDevice16BitStorageFeatures bit16_storage{}; 675#define LOG_EXTENSION(extension_name) \
1078 bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; 676 if (!loaded_extensions.contains(extension_name)) { \
1079 bit16_storage.pNext = &timeline_semaphore; 677 LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \
678 }
1080 679
1081 VkPhysicalDevice8BitStorageFeatures bit8_storage{}; 680 FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION);
1082 bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; 681 FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION);
1083 bit8_storage.pNext = &bit16_storage; 682#ifdef _WIN32
683 FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION);
684#else
685 FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION);
686#endif
1084 687
1085 VkPhysicalDeviceHostQueryResetFeatures host_query_reset{}; 688 if (requires_swapchain) {
1086 host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; 689 CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
1087 host_query_reset.pNext = &bit8_storage; 690 }
1088 691
1089 VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{}; 692#undef LOG_EXTENSION
1090 draw_parameters.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES; 693#undef CHECK_EXTENSION
1091 draw_parameters.pNext = &host_query_reset;
1092 694
1093 VkPhysicalDeviceFeatures2 features2{}; 695 // Generate the linked list of features to test.
1094 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 696 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1095 features2.pNext = &draw_parameters;
1096 697
1097 physical.GetFeatures2(features2); 698 // Set next pointer.
699 void** next = &features2.pNext;
1098 700
1099 const VkPhysicalDeviceFeatures& features{features2.features}; 701 // Test all features we know about. If the feature is not available in core at our
1100 std::array feature_report{ 702 // current API version, and was not enabled by an extension, skip testing the feature.
1101 std::make_pair(features.robustBufferAccess, "robustBufferAccess"), 703 // We set the structure sType explicitly here as it is zeroed by the constructor.
1102 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 704#define FEATURE(prefix, struct_name, macro_name, var_name) \
1103 std::make_pair(features.imageCubeArray, "imageCubeArray"), 705 features.var_name.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES; \
1104 std::make_pair(features.independentBlend, "independentBlend"), 706 SetNext(next, features.var_name);
1105 std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"),
1106 std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"),
1107 std::make_pair(features.depthClamp, "depthClamp"),
1108 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
1109 std::make_pair(features.largePoints, "largePoints"),
1110 std::make_pair(features.multiViewport, "multiViewport"),
1111 std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
1112 std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
1113 std::make_pair(features.wideLines, "wideLines"),
1114 std::make_pair(features.geometryShader, "geometryShader"),
1115 std::make_pair(features.tessellationShader, "tessellationShader"),
1116 std::make_pair(features.sampleRateShading, "sampleRateShading"),
1117 std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
1118 std::make_pair(features.logicOp, "logicOp"),
1119 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
1120 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
1121 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
1122 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
1123 "shaderStorageImageWriteWithoutFormat"),
1124 std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
1125 std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
1126 std::make_pair(variable_pointers.variablePointers, "variablePointers"),
1127 std::make_pair(variable_pointers.variablePointersStorageBuffer,
1128 "variablePointersStorageBuffer"),
1129 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
1130 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
1131 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
1132 std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
1133 std::make_pair(timeline_semaphore.timelineSemaphore, "timelineSemaphore"),
1134 std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"),
1135 std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess,
1136 "uniformAndStorageBuffer16BitAccess"),
1137 std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"),
1138 std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess,
1139 "uniformAndStorageBuffer8BitAccess"),
1140 std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"),
1141 std::make_pair(draw_parameters.shaderDrawParameters, "shaderDrawParameters"),
1142 };
1143 707
1144 bool has_all_required_features = true; 708#define EXT_FEATURE(prefix, struct_name, macro_name, var_name) \
1145 for (const auto& [is_supported, name] : feature_report) { 709 if (extensions.var_name) { \
1146 if (!is_supported) { 710 features.var_name.sType = \
1147 LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); 711 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES_##prefix; \
1148 has_all_required_features = false; 712 SetNext(next, features.var_name); \
1149 }
1150 } 713 }
1151 714
1152 if (!has_all_required_features) { 715 FOR_EACH_VK_FEATURE_1_1(FEATURE);
1153 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 716 FOR_EACH_VK_FEATURE_EXT(EXT_FEATURE);
717 if (instance_version >= VK_API_VERSION_1_2) {
718 FOR_EACH_VK_FEATURE_1_2(FEATURE);
719 } else {
720 FOR_EACH_VK_FEATURE_1_2(EXT_FEATURE);
1154 } 721 }
1155} 722 if (instance_version >= VK_API_VERSION_1_3) {
1156 723 FOR_EACH_VK_FEATURE_1_3(FEATURE);
1157std::vector<const char*> Device::LoadExtensions(bool requires_surface) { 724 } else {
1158 std::vector<const char*> extensions = ExtensionsRequiredForInstanceVersion(instance_version); 725 FOR_EACH_VK_FEATURE_1_3(EXT_FEATURE);
1159 if (requires_surface) {
1160 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
1161 } 726 }
1162 727
1163 bool has_khr_shader_float16_int8{}; 728#undef EXT_FEATURE
1164 bool has_khr_workgroup_memory_explicit_layout{}; 729#undef FEATURE
1165 bool has_khr_pipeline_executable_properties{};
1166 bool has_khr_image_format_list{};
1167 bool has_khr_swapchain_mutable_format{};
1168 bool has_ext_subgroup_size_control{};
1169 bool has_ext_transform_feedback{};
1170 bool has_ext_custom_border_color{};
1171 bool has_ext_extended_dynamic_state{};
1172 bool has_ext_extended_dynamic_state_2{};
1173 bool has_ext_extended_dynamic_state_3{};
1174 bool has_ext_shader_atomic_int64{};
1175 bool has_ext_provoking_vertex{};
1176 bool has_ext_vertex_input_dynamic_state{};
1177 bool has_ext_line_rasterization{};
1178 bool has_ext_primitive_topology_list_restart{};
1179 bool has_ext_depth_clip_control{};
1180 for (const std::string& extension : supported_extensions) {
1181 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
1182 bool push) {
1183 if (extension != name) {
1184 return;
1185 }
1186 if (push) {
1187 extensions.push_back(name);
1188 }
1189 if (status) {
1190 status->get() = true;
1191 }
1192 };
1193 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
1194 test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
1195 test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
1196 true);
1197 test(khr_uniform_buffer_standard_layout,
1198 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
1199 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
1200 test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
1201 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
1202 test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true);
1203 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
1204 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
1205 test(has_ext_primitive_topology_list_restart,
1206 VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true);
1207 test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
1208 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
1209 true);
1210 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
1211 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
1212 test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
1213 true);
1214 test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false);
1215 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
1216 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
1217 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
1218 test(has_ext_extended_dynamic_state_2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME,
1219 false);
1220 test(has_ext_extended_dynamic_state_3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME,
1221 false);
1222 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, true);
1223 test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
1224 test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
1225 false);
1226 test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
1227 test(has_khr_workgroup_memory_explicit_layout,
1228 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
1229 test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
1230 test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
1231 false);
1232 test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
1233 test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true);
1234 if (Settings::values.enable_nsight_aftermath) {
1235 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
1236 true);
1237 }
1238 if (Settings::values.renderer_shader_feedback) {
1239 test(has_khr_pipeline_executable_properties,
1240 VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false);
1241 }
1242 }
1243 VkPhysicalDeviceFeatures2 features{};
1244 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1245
1246 VkPhysicalDeviceProperties2 physical_properties{};
1247 physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1248
1249 if (has_khr_shader_float16_int8) {
1250 VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features;
1251 float16_int8_features.sType =
1252 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1253 float16_int8_features.pNext = nullptr;
1254 features.pNext = &float16_int8_features;
1255
1256 physical.GetFeatures2(features);
1257 is_float16_supported = float16_int8_features.shaderFloat16;
1258 is_int8_supported = float16_int8_features.shaderInt8;
1259 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
1260 }
1261 if (has_ext_subgroup_size_control) {
1262 VkPhysicalDeviceSubgroupSizeControlFeatures subgroup_features;
1263 subgroup_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES;
1264 subgroup_features.pNext = nullptr;
1265 features.pNext = &subgroup_features;
1266 physical.GetFeatures2(features);
1267
1268 VkPhysicalDeviceSubgroupSizeControlProperties subgroup_properties;
1269 subgroup_properties.sType =
1270 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1271 subgroup_properties.pNext = nullptr;
1272 physical_properties.pNext = &subgroup_properties;
1273 physical.GetProperties2(physical_properties);
1274 730
1275 is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; 731 // Perform the feature test.
732 physical.GetFeatures2(features2);
733 features.features = features2.features;
1276 734
1277 if (subgroup_features.subgroupSizeControl && 735 // Some features are mandatory. Check those.
1278 subgroup_properties.minSubgroupSize <= GuestWarpSize && 736#define CHECK_FEATURE(feature, name) \
1279 subgroup_properties.maxSubgroupSize >= GuestWarpSize) { 737 if (!features.feature.name) { \
1280 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); 738 LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \
1281 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; 739 suitable = false; \
1282 ext_subgroup_size_control = true;
1283 }
1284 } else {
1285 is_warp_potentially_bigger = true;
1286 } 740 }
1287 if (has_ext_provoking_vertex) {
1288 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
1289 provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
1290 provoking_vertex.pNext = nullptr;
1291 features.pNext = &provoking_vertex;
1292 physical.GetFeatures2(features);
1293
1294 if (provoking_vertex.provokingVertexLast &&
1295 provoking_vertex.transformFeedbackPreservesProvokingVertex) {
1296 extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
1297 ext_provoking_vertex = true;
1298 }
1299 }
1300 if (has_ext_vertex_input_dynamic_state) {
1301 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
1302 vertex_input.sType =
1303 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
1304 vertex_input.pNext = nullptr;
1305 features.pNext = &vertex_input;
1306 physical.GetFeatures2(features);
1307
1308 if (vertex_input.vertexInputDynamicState) {
1309 extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
1310 ext_vertex_input_dynamic_state = true;
1311 }
1312 }
1313 if (has_ext_shader_atomic_int64) {
1314 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
1315 atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES;
1316 atomic_int64.pNext = nullptr;
1317 features.pNext = &atomic_int64;
1318 physical.GetFeatures2(features);
1319
1320 if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
1321 extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
1322 ext_shader_atomic_int64 = true;
1323 }
1324 }
1325 if (has_ext_transform_feedback) {
1326 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
1327 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
1328 tfb_features.pNext = nullptr;
1329 features.pNext = &tfb_features;
1330 physical.GetFeatures2(features);
1331
1332 VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
1333 tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
1334 tfb_properties.pNext = nullptr;
1335 physical_properties.pNext = &tfb_properties;
1336 physical.GetProperties2(physical_properties);
1337 741
1338 if (tfb_features.transformFeedback && tfb_features.geometryStreams && 742#define LOG_FEATURE(feature, name) \
1339 tfb_properties.maxTransformFeedbackStreams >= 4 && 743 if (!features.feature.name) { \
1340 tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && 744 LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \
1341 tfb_properties.transformFeedbackDraw) {
1342 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
1343 ext_transform_feedback = true;
1344 }
1345 }
1346 if (has_ext_custom_border_color) {
1347 VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
1348 border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
1349 border_features.pNext = nullptr;
1350 features.pNext = &border_features;
1351 physical.GetFeatures2(features);
1352
1353 if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) {
1354 extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
1355 ext_custom_border_color = true;
1356 }
1357 }
1358 if (has_ext_extended_dynamic_state) {
1359 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
1360 extended_dynamic_state.sType =
1361 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
1362 extended_dynamic_state.pNext = nullptr;
1363 features.pNext = &extended_dynamic_state;
1364 physical.GetFeatures2(features);
1365
1366 if (extended_dynamic_state.extendedDynamicState) {
1367 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
1368 ext_extended_dynamic_state = true;
1369 }
1370 }
1371 if (has_ext_extended_dynamic_state_2) {
1372 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT extended_dynamic_state_2;
1373 extended_dynamic_state_2.sType =
1374 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT;
1375 extended_dynamic_state_2.pNext = nullptr;
1376 features.pNext = &extended_dynamic_state_2;
1377 physical.GetFeatures2(features);
1378
1379 if (extended_dynamic_state_2.extendedDynamicState2) {
1380 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
1381 ext_extended_dynamic_state_2 = true;
1382 ext_extended_dynamic_state_2_extra =
1383 extended_dynamic_state_2.extendedDynamicState2LogicOp;
1384 }
1385 } 745 }
1386 if (has_ext_extended_dynamic_state_3) { 746
1387 VkPhysicalDeviceExtendedDynamicState3FeaturesEXT extended_dynamic_state_3; 747 FOR_EACH_VK_RECOMMENDED_FEATURE(LOG_FEATURE);
1388 extended_dynamic_state_3.sType = 748 FOR_EACH_VK_MANDATORY_FEATURE(CHECK_FEATURE);
1389 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT; 749
1390 extended_dynamic_state_3.pNext = nullptr; 750#undef LOG_FEATURE
1391 features.pNext = &extended_dynamic_state_3; 751#undef CHECK_FEATURE
1392 physical.GetFeatures2(features); 752
1393 753 // Generate linked list of properties.
1394 ext_extended_dynamic_state_3_blend = 754 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1395 extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && 755
1396 extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && 756 // Set next pointer.
1397 extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; 757 next = &properties2.pNext;
1398 758
1399 ext_extended_dynamic_state_3_enables = 759 // Get driver info.
1400 extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && 760 properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
1401 extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; 761 SetNext(next, properties.driver);
1402 762
1403 ext_extended_dynamic_state_3 = 763 // Retrieve relevant extension properties.
1404 ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; 764 if (extensions.shader_float_controls) {
1405 if (ext_extended_dynamic_state_3) { 765 properties.float_controls.sType =
1406 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); 766 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
1407 } 767 SetNext(next, properties.float_controls);
1408 } 768 }
1409 if (has_ext_line_rasterization) { 769 if (extensions.push_descriptor) {
1410 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; 770 properties.push_descriptor.sType =
1411 line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; 771 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
1412 line_raster.pNext = nullptr; 772 SetNext(next, properties.push_descriptor);
1413 features.pNext = &line_raster;
1414 physical.GetFeatures2(features);
1415 if (line_raster.rectangularLines && line_raster.smoothLines) {
1416 extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
1417 ext_line_rasterization = true;
1418 }
1419 } 773 }
1420 if (has_ext_depth_clip_control) { 774 if (extensions.subgroup_size_control) {
1421 VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; 775 properties.subgroup_size_control.sType =
1422 depth_clip_control_features.sType = 776 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1423 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT; 777 SetNext(next, properties.subgroup_size_control);
1424 depth_clip_control_features.pNext = nullptr;
1425 features.pNext = &depth_clip_control_features;
1426 physical.GetFeatures2(features);
1427
1428 if (depth_clip_control_features.depthClipControl) {
1429 extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
1430 ext_depth_clip_control = true;
1431 }
1432 } 778 }
1433 if (has_khr_workgroup_memory_explicit_layout) { 779 if (extensions.transform_feedback) {
1434 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; 780 properties.transform_feedback.sType =
1435 layout.sType = 781 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
1436 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; 782 SetNext(next, properties.transform_feedback);
1437 layout.pNext = nullptr;
1438 features.pNext = &layout;
1439 physical.GetFeatures2(features);
1440
1441 if (layout.workgroupMemoryExplicitLayout &&
1442 layout.workgroupMemoryExplicitLayout8BitAccess &&
1443 layout.workgroupMemoryExplicitLayout16BitAccess &&
1444 layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
1445 extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
1446 khr_workgroup_memory_explicit_layout = true;
1447 }
1448 } 783 }
1449 if (has_khr_pipeline_executable_properties) { 784
1450 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; 785 // Perform the property fetch.
1451 executable_properties.sType = 786 physical.GetProperties2(properties2);
1452 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; 787 properties.properties = properties2.properties;
1453 executable_properties.pNext = nullptr; 788
1454 features.pNext = &executable_properties; 789 // Unload extensions if feature support is insufficient.
1455 physical.GetFeatures2(features); 790 RemoveUnsuitableExtensions();
1456 791
1457 if (executable_properties.pipelineExecutableInfo) { 792 // Check limits.
1458 extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); 793 struct Limit {
1459 khr_pipeline_executable_properties = true; 794 u32 minimum;
795 u32 value;
796 const char* name;
797 };
798
799 const VkPhysicalDeviceLimits& limits{properties.properties.limits};
800 const std::array limits_report{
801 Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
802 Limit{16, limits.maxViewports, "maxViewports"},
803 Limit{8, limits.maxColorAttachments, "maxColorAttachments"},
804 Limit{8, limits.maxClipDistances, "maxClipDistances"},
805 };
806
807 for (const auto& [min, value, name] : limits_report) {
808 if (value < min) {
809 LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", name, min, value);
810 suitable = false;
1460 } 811 }
1461 } 812 }
1462 if (has_ext_primitive_topology_list_restart) {
1463 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{};
1464 primitive_topology_list_restart.sType =
1465 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT;
1466 primitive_topology_list_restart.pNext = nullptr;
1467 features.pNext = &primitive_topology_list_restart;
1468 physical.GetFeatures2(features);
1469
1470 is_topology_list_restart_supported =
1471 primitive_topology_list_restart.primitiveTopologyListRestart;
1472 is_patch_list_restart_supported =
1473 primitive_topology_list_restart.primitiveTopologyPatchListRestart;
1474 }
1475 if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
1476 extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
1477 extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
1478 khr_swapchain_mutable_format = true;
1479 }
1480 if (khr_push_descriptor) {
1481 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
1482 push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
1483 push_descriptor.pNext = nullptr;
1484 813
1485 physical_properties.pNext = &push_descriptor; 814 // Return whether we were suitable.
1486 physical.GetProperties2(physical_properties); 815 return suitable;
816}
1487 817
1488 max_push_descriptors = push_descriptor.maxPushDescriptors; 818void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) {
819 if (loaded_extensions.contains(extension_name) && !is_suitable) {
820 LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name);
821 loaded_extensions.erase(extension_name);
1489 } 822 }
1490 return extensions; 823}
824
825void Device::RemoveUnsuitableExtensions() {
826 // VK_EXT_custom_border_color
827 extensions.custom_border_color = features.custom_border_color.customBorderColors &&
828 features.custom_border_color.customBorderColorWithoutFormat;
829 RemoveExtensionIfUnsuitable(extensions.custom_border_color,
830 VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
831
832 // VK_EXT_depth_clip_control
833 extensions.depth_clip_control = features.depth_clip_control.depthClipControl;
834 RemoveExtensionIfUnsuitable(extensions.depth_clip_control,
835 VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
836
837 // VK_EXT_extended_dynamic_state
838 extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState;
839 RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state,
840 VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
841
842 // VK_EXT_extended_dynamic_state2
843 extensions.extended_dynamic_state2 = features.extended_dynamic_state2.extendedDynamicState2;
844 RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state2,
845 VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
846
847 // VK_EXT_extended_dynamic_state3
848 dynamic_state3_blending =
849 features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable &&
850 features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation &&
851 features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask;
852 dynamic_state3_enables =
853 features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable &&
854 features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
855
856 extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables;
857 dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3;
858 dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3;
859 RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state3,
860 VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
861
862 // VK_EXT_provoking_vertex
863 extensions.provoking_vertex =
864 features.provoking_vertex.provokingVertexLast &&
865 features.provoking_vertex.transformFeedbackPreservesProvokingVertex;
866 RemoveExtensionIfUnsuitable(extensions.provoking_vertex,
867 VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
868
869 // VK_KHR_shader_atomic_int64
870 extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
871 features.shader_atomic_int64.shaderSharedInt64Atomics;
872 RemoveExtensionIfUnsuitable(extensions.shader_atomic_int64,
873 VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
874
875 // VK_EXT_shader_demote_to_helper_invocation
876 extensions.shader_demote_to_helper_invocation =
877 features.shader_demote_to_helper_invocation.shaderDemoteToHelperInvocation;
878 RemoveExtensionIfUnsuitable(extensions.shader_demote_to_helper_invocation,
879 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
880
881 // VK_EXT_subgroup_size_control
882 extensions.subgroup_size_control =
883 features.subgroup_size_control.subgroupSizeControl &&
884 properties.subgroup_size_control.minSubgroupSize <= GuestWarpSize &&
885 properties.subgroup_size_control.maxSubgroupSize >= GuestWarpSize;
886 RemoveExtensionIfUnsuitable(extensions.subgroup_size_control,
887 VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
888
889 // VK_EXT_transform_feedback
890 extensions.transform_feedback =
891 features.transform_feedback.transformFeedback &&
892 features.transform_feedback.geometryStreams &&
893 properties.transform_feedback.maxTransformFeedbackStreams >= 4 &&
894 properties.transform_feedback.maxTransformFeedbackBuffers > 0 &&
895 properties.transform_feedback.transformFeedbackQueries &&
896 properties.transform_feedback.transformFeedbackDraw;
897 RemoveExtensionIfUnsuitable(extensions.transform_feedback,
898 VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
899
900 // VK_EXT_vertex_input_dynamic_state
901 extensions.vertex_input_dynamic_state =
902 features.vertex_input_dynamic_state.vertexInputDynamicState;
903 RemoveExtensionIfUnsuitable(extensions.vertex_input_dynamic_state,
904 VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
905
906 // VK_KHR_pipeline_executable_properties
907 if (Settings::values.renderer_shader_feedback.GetValue()) {
908 extensions.pipeline_executable_properties =
909 features.pipeline_executable_properties.pipelineExecutableInfo;
910 RemoveExtensionIfUnsuitable(extensions.pipeline_executable_properties,
911 VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
912 } else {
913 extensions.pipeline_executable_properties = false;
914 loaded_extensions.erase(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
915 }
916
917 // VK_KHR_workgroup_memory_explicit_layout
918 extensions.workgroup_memory_explicit_layout =
919 features.features.shaderInt16 &&
920 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
921 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess &&
922 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess &&
923 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout;
924 RemoveExtensionIfUnsuitable(extensions.workgroup_memory_explicit_layout,
925 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
1491} 926}
1492 927
1493void Device::SetupFamilies(VkSurfaceKHR surface) { 928void Device::SetupFamilies(VkSurfaceKHR surface) {
@@ -1517,55 +952,12 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
1517 LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); 952 LOG_ERROR(Render_Vulkan, "Device lacks a present queue");
1518 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 953 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
1519 } 954 }
1520 graphics_family = *graphics; 955 if (graphics) {
1521 present_family = *present; 956 graphics_family = *graphics;
1522} 957 }
1523 958 if (present) {
1524void Device::SetupFeatures() { 959 present_family = *present;
1525 const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; 960 }
1526 is_depth_bounds_supported = features.depthBounds;
1527 is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
1528 is_shader_float64_supported = features.shaderFloat64;
1529 is_shader_int64_supported = features.shaderInt64;
1530 is_shader_int16_supported = features.shaderInt16;
1531 is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
1532 is_blit_depth_stencil_supported = TestDepthStencilBlits();
1533 is_optimal_astc_supported = IsOptimalAstcSupported(features);
1534
1535 const VkPhysicalDeviceLimits& limits{properties.limits};
1536 max_vertex_input_attributes = limits.maxVertexInputAttributes;
1537 max_vertex_input_bindings = limits.maxVertexInputBindings;
1538}
1539
1540void Device::SetupProperties() {
1541 float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
1542
1543 VkPhysicalDeviceProperties2KHR properties2{};
1544 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1545 properties2.pNext = &float_controls;
1546
1547 physical.GetProperties2(properties2);
1548}
1549
1550void Device::CollectTelemetryParameters() {
1551 VkPhysicalDeviceDriverProperties driver{
1552 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
1553 .pNext = nullptr,
1554 .driverID = {},
1555 .driverName = {},
1556 .driverInfo = {},
1557 .conformanceVersion = {},
1558 };
1559
1560 VkPhysicalDeviceProperties2 device_properties{
1561 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
1562 .pNext = &driver,
1563 .properties = {},
1564 };
1565 physical.GetProperties2(device_properties);
1566
1567 driver_id = driver.driverID;
1568 vendor_name = driver.driverName;
1569} 961}
1570 962
1571u64 Device::GetDeviceMemoryUsage() const { 963u64 Device::GetDeviceMemoryUsage() const {
@@ -1583,7 +975,8 @@ u64 Device::GetDeviceMemoryUsage() const {
1583void Device::CollectPhysicalMemoryInfo() { 975void Device::CollectPhysicalMemoryInfo() {
1584 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; 976 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
1585 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; 977 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
1586 const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); 978 const auto mem_info =
979 physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr);
1587 const auto& mem_properties = mem_info.memoryProperties; 980 const auto& mem_properties = mem_info.memoryProperties;
1588 const size_t num_properties = mem_properties.memoryHeapCount; 981 const size_t num_properties = mem_properties.memoryHeapCount;
1589 device_access_memory = 0; 982 device_access_memory = 0;
@@ -1599,7 +992,7 @@ void Device::CollectPhysicalMemoryInfo() {
1599 if (is_heap_local) { 992 if (is_heap_local) {
1600 local_memory += mem_properties.memoryHeaps[element].size; 993 local_memory += mem_properties.memoryHeaps[element].size;
1601 } 994 }
1602 if (ext_memory_budget) { 995 if (extensions.memory_budget) {
1603 device_initial_usage += budget.heapUsage[element]; 996 device_initial_usage += budget.heapUsage[element];
1604 device_access_memory += budget.heapBudget[element]; 997 device_access_memory += budget.heapBudget[element];
1605 continue; 998 continue;
@@ -1615,7 +1008,7 @@ void Device::CollectPhysicalMemoryInfo() {
1615} 1008}
1616 1009
1617void Device::CollectToolingInfo() { 1010void Device::CollectToolingInfo() {
1618 if (!ext_tooling_info) { 1011 if (!extensions.tooling_info) {
1619 return; 1012 return;
1620 } 1013 }
1621 auto tools{physical.GetPhysicalDeviceToolProperties()}; 1014 auto tools{physical.GetPhysicalDeviceToolProperties()};
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 920a8f4e3..0662a2d9f 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -3,6 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <set>
6#include <span> 7#include <span>
7#include <string> 8#include <string>
8#include <unordered_map> 9#include <unordered_map>
@@ -11,6 +12,156 @@
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
13 14
15// Define all features which may be used by the implementation here.
16// Vulkan version in the macro describes the minimum version required for feature availability.
17// If the Vulkan version is lower than the required version, the named extension is required.
18#define FOR_EACH_VK_FEATURE_1_1(FEATURE) \
19 FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \
20 FEATURE(KHR, 16BitStorage, 16BIT_STORAGE, bit16_storage) \
21 FEATURE(KHR, ShaderAtomicInt64, SHADER_ATOMIC_INT64, shader_atomic_int64) \
22 FEATURE(KHR, ShaderDrawParameters, SHADER_DRAW_PARAMETERS, shader_draw_parameters) \
23 FEATURE(KHR, ShaderFloat16Int8, SHADER_FLOAT16_INT8, shader_float16_int8) \
24 FEATURE(KHR, UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, \
25 uniform_buffer_standard_layout) \
26 FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer)
27
28#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \
29 FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \
30 FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \
31 FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
32
33#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \
34 FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \
35 shader_demote_to_helper_invocation)
36
37// Define all features which may be used by the implementation and require an extension here.
38#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
39 FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \
40 FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \
41 FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
42 FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
43 FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
44 FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
45 FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
46 FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
47 primitive_topology_list_restart) \
48 FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
49 FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
50 FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
51 FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
52 FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
53 pipeline_executable_properties) \
54 FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
55 workgroup_memory_explicit_layout)
56
57// Define miscellaneous extensions which may be used by the implementation here.
58#define FOR_EACH_VK_EXTENSION(EXTENSION) \
59 EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \
60 EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \
61 EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \
62 EXTENSION(EXT, ROBUSTNESS_2, robustness_2) \
63 EXTENSION(EXT, SAMPLER_FILTER_MINMAX, sampler_filter_minmax) \
64 EXTENSION(EXT, SHADER_STENCIL_EXPORT, shader_stencil_export) \
65 EXTENSION(EXT, SHADER_VIEWPORT_INDEX_LAYER, shader_viewport_index_layer) \
66 EXTENSION(EXT, TOOLING_INFO, tooling_info) \
67 EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \
68 EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \
69 EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \
70 EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \
71 EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \
72 EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \
73 EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
74 EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
75 EXTENSION(KHR, SWAPCHAIN, swapchain) \
76 EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
77 EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
78 EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
79 EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
80 EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle)
81
82#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \
83 EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32)
84
85// Define extensions which must be supported.
86#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
87 EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
88 EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
89 EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \
90 EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
91 EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
92
93#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \
94 EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME)
95
96#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \
97 EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME)
98
99// Define extensions where the absence of the extension may result in a degraded experience.
100#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
101 EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
102 EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \
103 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
104 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
105 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
106 EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
107 EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
108 EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \
109 EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \
110 EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME)
111
112// Define features which must be supported.
113#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \
114 FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
115 FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
116 FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
117 FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
118 FEATURE_NAME(features, depthBiasClamp) \
119 FEATURE_NAME(features, depthClamp) \
120 FEATURE_NAME(features, drawIndirectFirstInstance) \
121 FEATURE_NAME(features, dualSrcBlend) \
122 FEATURE_NAME(features, fillModeNonSolid) \
123 FEATURE_NAME(features, fragmentStoresAndAtomics) \
124 FEATURE_NAME(features, geometryShader) \
125 FEATURE_NAME(features, imageCubeArray) \
126 FEATURE_NAME(features, independentBlend) \
127 FEATURE_NAME(features, largePoints) \
128 FEATURE_NAME(features, logicOp) \
129 FEATURE_NAME(features, multiDrawIndirect) \
130 FEATURE_NAME(features, multiViewport) \
131 FEATURE_NAME(features, occlusionQueryPrecise) \
132 FEATURE_NAME(features, robustBufferAccess) \
133 FEATURE_NAME(features, samplerAnisotropy) \
134 FEATURE_NAME(features, sampleRateShading) \
135 FEATURE_NAME(features, shaderClipDistance) \
136 FEATURE_NAME(features, shaderCullDistance) \
137 FEATURE_NAME(features, shaderImageGatherExtended) \
138 FEATURE_NAME(features, shaderStorageImageWriteWithoutFormat) \
139 FEATURE_NAME(features, tessellationShader) \
140 FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \
141 FEATURE_NAME(features, wideLines) \
142 FEATURE_NAME(host_query_reset, hostQueryReset) \
143 FEATURE_NAME(robustness2, nullDescriptor) \
144 FEATURE_NAME(robustness2, robustBufferAccess2) \
145 FEATURE_NAME(robustness2, robustImageAccess2) \
146 FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \
147 FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \
148 FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
149 FEATURE_NAME(variable_pointer, variablePointers) \
150 FEATURE_NAME(variable_pointer, variablePointersStorageBuffer)
151
152// Define features where the absence of the feature may result in a degraded experience.
153#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \
154 FEATURE_NAME(custom_border_color, customBorderColors) \
155 FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
156 FEATURE_NAME(index_type_uint8, indexTypeUint8) \
157 FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
158 FEATURE_NAME(provoking_vertex, provokingVertexLast) \
159 FEATURE_NAME(shader_float16_int8, shaderFloat16) \
160 FEATURE_NAME(shader_float16_int8, shaderInt8) \
161 FEATURE_NAME(transform_feedback, transformFeedback) \
162 FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \
163 FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState)
164
14namespace Vulkan { 165namespace Vulkan {
15 166
16class NsightAftermathTracker; 167class NsightAftermathTracker;
@@ -88,67 +239,69 @@ public:
88 239
89 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. 240 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
90 u32 ApiVersion() const { 241 u32 ApiVersion() const {
91 return properties.apiVersion; 242 return properties.properties.apiVersion;
92 } 243 }
93 244
94 /// Returns the current driver version provided in Vulkan-formatted version numbers. 245 /// Returns the current driver version provided in Vulkan-formatted version numbers.
95 u32 GetDriverVersion() const { 246 u32 GetDriverVersion() const {
96 return properties.driverVersion; 247 return properties.properties.driverVersion;
97 } 248 }
98 249
99 /// Returns the device name. 250 /// Returns the device name.
100 std::string_view GetModelName() const { 251 std::string_view GetModelName() const {
101 return properties.deviceName; 252 return properties.properties.deviceName;
102 } 253 }
103 254
104 /// Returns the driver ID. 255 /// Returns the driver ID.
105 VkDriverIdKHR GetDriverID() const { 256 VkDriverIdKHR GetDriverID() const {
106 return driver_id; 257 return properties.driver.driverID;
107 } 258 }
108 259
260 bool ShouldBoostClocks() const;
261
109 /// Returns uniform buffer alignment requeriment. 262 /// Returns uniform buffer alignment requeriment.
110 VkDeviceSize GetUniformBufferAlignment() const { 263 VkDeviceSize GetUniformBufferAlignment() const {
111 return properties.limits.minUniformBufferOffsetAlignment; 264 return properties.properties.limits.minUniformBufferOffsetAlignment;
112 } 265 }
113 266
114 /// Returns storage alignment requeriment. 267 /// Returns storage alignment requeriment.
115 VkDeviceSize GetStorageBufferAlignment() const { 268 VkDeviceSize GetStorageBufferAlignment() const {
116 return properties.limits.minStorageBufferOffsetAlignment; 269 return properties.properties.limits.minStorageBufferOffsetAlignment;
117 } 270 }
118 271
119 /// Returns the maximum range for storage buffers. 272 /// Returns the maximum range for storage buffers.
120 VkDeviceSize GetMaxStorageBufferRange() const { 273 VkDeviceSize GetMaxStorageBufferRange() const {
121 return properties.limits.maxStorageBufferRange; 274 return properties.properties.limits.maxStorageBufferRange;
122 } 275 }
123 276
124 /// Returns the maximum size for push constants. 277 /// Returns the maximum size for push constants.
125 VkDeviceSize GetMaxPushConstantsSize() const { 278 VkDeviceSize GetMaxPushConstantsSize() const {
126 return properties.limits.maxPushConstantsSize; 279 return properties.properties.limits.maxPushConstantsSize;
127 } 280 }
128 281
129 /// Returns the maximum size for shared memory. 282 /// Returns the maximum size for shared memory.
130 u32 GetMaxComputeSharedMemorySize() const { 283 u32 GetMaxComputeSharedMemorySize() const {
131 return properties.limits.maxComputeSharedMemorySize; 284 return properties.properties.limits.maxComputeSharedMemorySize;
132 } 285 }
133 286
134 /// Returns float control properties of the device. 287 /// Returns float control properties of the device.
135 const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { 288 const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
136 return float_controls; 289 return properties.float_controls;
137 } 290 }
138 291
139 /// Returns true if ASTC is natively supported. 292 /// Returns true if ASTC is natively supported.
140 bool IsOptimalAstcSupported() const { 293 bool IsOptimalAstcSupported() const {
141 return is_optimal_astc_supported; 294 return features.features.textureCompressionASTC_LDR;
142 } 295 }
143 296
144 /// Returns true if the device supports float16 natively. 297 /// Returns true if the device supports float16 natively.
145 bool IsFloat16Supported() const { 298 bool IsFloat16Supported() const {
146 return is_float16_supported; 299 return features.shader_float16_int8.shaderFloat16;
147 } 300 }
148 301
149 /// Returns true if the device supports int8 natively. 302 /// Returns true if the device supports int8 natively.
150 bool IsInt8Supported() const { 303 bool IsInt8Supported() const {
151 return is_int8_supported; 304 return features.shader_float16_int8.shaderInt8;
152 } 305 }
153 306
154 /// Returns true if the device warp size can potentially be bigger than guest's warp size. 307 /// Returns true if the device warp size can potentially be bigger than guest's warp size.
@@ -158,32 +311,32 @@ public:
158 311
159 /// Returns true if the device can be forced to use the guest warp size. 312 /// Returns true if the device can be forced to use the guest warp size.
160 bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { 313 bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
161 return guest_warp_stages & stage; 314 return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
162 } 315 }
163 316
164 /// Returns the maximum number of push descriptors. 317 /// Returns the maximum number of push descriptors.
165 u32 MaxPushDescriptors() const { 318 u32 MaxPushDescriptors() const {
166 return max_push_descriptors; 319 return properties.push_descriptor.maxPushDescriptors;
167 } 320 }
168 321
169 /// Returns true if formatless image load is supported. 322 /// Returns true if formatless image load is supported.
170 bool IsFormatlessImageLoadSupported() const { 323 bool IsFormatlessImageLoadSupported() const {
171 return is_formatless_image_load_supported; 324 return features.features.shaderStorageImageReadWithoutFormat;
172 } 325 }
173 326
174 /// Returns true if shader int64 is supported. 327 /// Returns true if shader int64 is supported.
175 bool IsShaderInt64Supported() const { 328 bool IsShaderInt64Supported() const {
176 return is_shader_int64_supported; 329 return features.features.shaderInt64;
177 } 330 }
178 331
179 /// Returns true if shader int16 is supported. 332 /// Returns true if shader int16 is supported.
180 bool IsShaderInt16Supported() const { 333 bool IsShaderInt16Supported() const {
181 return is_shader_int16_supported; 334 return features.features.shaderInt16;
182 } 335 }
183 336
184 // Returns true if depth bounds is supported. 337 // Returns true if depth bounds is supported.
185 bool IsDepthBoundsSupported() const { 338 bool IsDepthBoundsSupported() const {
186 return is_depth_bounds_supported; 339 return features.features.depthBounds;
187 } 340 }
188 341
189 /// Returns true when blitting from and to depth stencil images is supported. 342 /// Returns true when blitting from and to depth stencil images is supported.
@@ -193,151 +346,151 @@ public:
193 346
194 /// Returns true if the device supports VK_NV_viewport_swizzle. 347 /// Returns true if the device supports VK_NV_viewport_swizzle.
195 bool IsNvViewportSwizzleSupported() const { 348 bool IsNvViewportSwizzleSupported() const {
196 return nv_viewport_swizzle; 349 return extensions.viewport_swizzle;
197 } 350 }
198 351
199 /// Returns true if the device supports VK_NV_viewport_array2. 352 /// Returns true if the device supports VK_NV_viewport_array2.
200 bool IsNvViewportArray2Supported() const { 353 bool IsNvViewportArray2Supported() const {
201 return nv_viewport_array2; 354 return extensions.viewport_array2;
202 } 355 }
203 356
204 /// Returns true if the device supports VK_NV_geometry_shader_passthrough. 357 /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
205 bool IsNvGeometryShaderPassthroughSupported() const { 358 bool IsNvGeometryShaderPassthroughSupported() const {
206 return nv_geometry_shader_passthrough; 359 return extensions.geometry_shader_passthrough;
207 } 360 }
208 361
209 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. 362 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
210 bool IsKhrUniformBufferStandardLayoutSupported() const { 363 bool IsKhrUniformBufferStandardLayoutSupported() const {
211 return khr_uniform_buffer_standard_layout; 364 return extensions.uniform_buffer_standard_layout;
212 } 365 }
213 366
214 /// Returns true if the device supports VK_KHR_push_descriptor. 367 /// Returns true if the device supports VK_KHR_push_descriptor.
215 bool IsKhrPushDescriptorSupported() const { 368 bool IsKhrPushDescriptorSupported() const {
216 return khr_push_descriptor; 369 return extensions.push_descriptor;
217 } 370 }
218 371
219 /// Returns true if VK_KHR_pipeline_executable_properties is enabled. 372 /// Returns true if VK_KHR_pipeline_executable_properties is enabled.
220 bool IsKhrPipelineExecutablePropertiesEnabled() const { 373 bool IsKhrPipelineExecutablePropertiesEnabled() const {
221 return khr_pipeline_executable_properties; 374 return extensions.pipeline_executable_properties;
222 } 375 }
223 376
224 /// Returns true if VK_KHR_swapchain_mutable_format is enabled. 377 /// Returns true if VK_KHR_swapchain_mutable_format is enabled.
225 bool IsKhrSwapchainMutableFormatEnabled() const { 378 bool IsKhrSwapchainMutableFormatEnabled() const {
226 return khr_swapchain_mutable_format; 379 return extensions.swapchain_mutable_format;
227 } 380 }
228 381
229 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. 382 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
230 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { 383 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
231 return khr_workgroup_memory_explicit_layout; 384 return extensions.workgroup_memory_explicit_layout;
232 } 385 }
233 386
234 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. 387 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
235 bool IsTopologyListPrimitiveRestartSupported() const { 388 bool IsTopologyListPrimitiveRestartSupported() const {
236 return is_topology_list_restart_supported; 389 return features.primitive_topology_list_restart.primitiveTopologyListRestart;
237 } 390 }
238 391
239 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. 392 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
240 bool IsPatchListPrimitiveRestartSupported() const { 393 bool IsPatchListPrimitiveRestartSupported() const {
241 return is_patch_list_restart_supported; 394 return features.primitive_topology_list_restart.primitiveTopologyPatchListRestart;
242 } 395 }
243 396
244 /// Returns true if the device supports VK_EXT_index_type_uint8. 397 /// Returns true if the device supports VK_EXT_index_type_uint8.
245 bool IsExtIndexTypeUint8Supported() const { 398 bool IsExtIndexTypeUint8Supported() const {
246 return ext_index_type_uint8; 399 return extensions.index_type_uint8;
247 } 400 }
248 401
249 /// Returns true if the device supports VK_EXT_sampler_filter_minmax. 402 /// Returns true if the device supports VK_EXT_sampler_filter_minmax.
250 bool IsExtSamplerFilterMinmaxSupported() const { 403 bool IsExtSamplerFilterMinmaxSupported() const {
251 return ext_sampler_filter_minmax; 404 return extensions.sampler_filter_minmax;
252 } 405 }
253 406
254 /// Returns true if the device supports VK_EXT_depth_range_unrestricted. 407 /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
255 bool IsExtDepthRangeUnrestrictedSupported() const { 408 bool IsExtDepthRangeUnrestrictedSupported() const {
256 return ext_depth_range_unrestricted; 409 return extensions.depth_range_unrestricted;
257 } 410 }
258 411
259 /// Returns true if the device supports VK_EXT_depth_clip_control. 412 /// Returns true if the device supports VK_EXT_depth_clip_control.
260 bool IsExtDepthClipControlSupported() const { 413 bool IsExtDepthClipControlSupported() const {
261 return ext_depth_clip_control; 414 return extensions.depth_clip_control;
262 } 415 }
263 416
264 /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. 417 /// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
265 bool IsExtShaderViewportIndexLayerSupported() const { 418 bool IsExtShaderViewportIndexLayerSupported() const {
266 return ext_shader_viewport_index_layer; 419 return extensions.shader_viewport_index_layer;
267 } 420 }
268 421
269 /// Returns true if the device supports VK_EXT_subgroup_size_control. 422 /// Returns true if the device supports VK_EXT_subgroup_size_control.
270 bool IsExtSubgroupSizeControlSupported() const { 423 bool IsExtSubgroupSizeControlSupported() const {
271 return ext_subgroup_size_control; 424 return extensions.subgroup_size_control;
272 } 425 }
273 426
274 /// Returns true if the device supports VK_EXT_transform_feedback. 427 /// Returns true if the device supports VK_EXT_transform_feedback.
275 bool IsExtTransformFeedbackSupported() const { 428 bool IsExtTransformFeedbackSupported() const {
276 return ext_transform_feedback; 429 return extensions.transform_feedback;
277 } 430 }
278 431
279 /// Returns true if the device supports VK_EXT_custom_border_color. 432 /// Returns true if the device supports VK_EXT_custom_border_color.
280 bool IsExtCustomBorderColorSupported() const { 433 bool IsExtCustomBorderColorSupported() const {
281 return ext_custom_border_color; 434 return extensions.custom_border_color;
282 } 435 }
283 436
284 /// Returns true if the device supports VK_EXT_extended_dynamic_state. 437 /// Returns true if the device supports VK_EXT_extended_dynamic_state.
285 bool IsExtExtendedDynamicStateSupported() const { 438 bool IsExtExtendedDynamicStateSupported() const {
286 return ext_extended_dynamic_state; 439 return extensions.extended_dynamic_state;
287 } 440 }
288 441
289 /// Returns true if the device supports VK_EXT_extended_dynamic_state2. 442 /// Returns true if the device supports VK_EXT_extended_dynamic_state2.
290 bool IsExtExtendedDynamicState2Supported() const { 443 bool IsExtExtendedDynamicState2Supported() const {
291 return ext_extended_dynamic_state_2; 444 return extensions.extended_dynamic_state2;
292 } 445 }
293 446
294 bool IsExtExtendedDynamicState2ExtrasSupported() const { 447 bool IsExtExtendedDynamicState2ExtrasSupported() const {
295 return ext_extended_dynamic_state_2_extra; 448 return features.extended_dynamic_state2.extendedDynamicState2LogicOp;
296 } 449 }
297 450
298 /// Returns true if the device supports VK_EXT_extended_dynamic_state3. 451 /// Returns true if the device supports VK_EXT_extended_dynamic_state3.
299 bool IsExtExtendedDynamicState3Supported() const { 452 bool IsExtExtendedDynamicState3Supported() const {
300 return ext_extended_dynamic_state_3; 453 return extensions.extended_dynamic_state3;
301 } 454 }
302 455
303 /// Returns true if the device supports VK_EXT_extended_dynamic_state3. 456 /// Returns true if the device supports VK_EXT_extended_dynamic_state3.
304 bool IsExtExtendedDynamicState3BlendingSupported() const { 457 bool IsExtExtendedDynamicState3BlendingSupported() const {
305 return ext_extended_dynamic_state_3_blend; 458 return dynamic_state3_blending;
306 } 459 }
307 460
308 /// Returns true if the device supports VK_EXT_extended_dynamic_state3. 461 /// Returns true if the device supports VK_EXT_extended_dynamic_state3.
309 bool IsExtExtendedDynamicState3EnablesSupported() const { 462 bool IsExtExtendedDynamicState3EnablesSupported() const {
310 return ext_extended_dynamic_state_3_enables; 463 return dynamic_state3_enables;
311 } 464 }
312 465
313 /// Returns true if the device supports VK_EXT_line_rasterization. 466 /// Returns true if the device supports VK_EXT_line_rasterization.
314 bool IsExtLineRasterizationSupported() const { 467 bool IsExtLineRasterizationSupported() const {
315 return ext_line_rasterization; 468 return extensions.line_rasterization;
316 } 469 }
317 470
318 /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. 471 /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
319 bool IsExtVertexInputDynamicStateSupported() const { 472 bool IsExtVertexInputDynamicStateSupported() const {
320 return ext_vertex_input_dynamic_state; 473 return extensions.vertex_input_dynamic_state;
321 } 474 }
322 475
323 /// Returns true if the device supports VK_EXT_shader_stencil_export. 476 /// Returns true if the device supports VK_EXT_shader_stencil_export.
324 bool IsExtShaderStencilExportSupported() const { 477 bool IsExtShaderStencilExportSupported() const {
325 return ext_shader_stencil_export; 478 return extensions.shader_stencil_export;
326 } 479 }
327 480
328 /// Returns true if the device supports VK_EXT_conservative_rasterization. 481 /// Returns true if the device supports VK_EXT_conservative_rasterization.
329 bool IsExtConservativeRasterizationSupported() const { 482 bool IsExtConservativeRasterizationSupported() const {
330 return ext_conservative_rasterization; 483 return extensions.conservative_rasterization;
331 } 484 }
332 485
333 /// Returns true if the device supports VK_EXT_provoking_vertex. 486 /// Returns true if the device supports VK_EXT_provoking_vertex.
334 bool IsExtProvokingVertexSupported() const { 487 bool IsExtProvokingVertexSupported() const {
335 return ext_provoking_vertex; 488 return extensions.provoking_vertex;
336 } 489 }
337 490
338 /// Returns true if the device supports VK_KHR_shader_atomic_int64. 491 /// Returns true if the device supports VK_KHR_shader_atomic_int64.
339 bool IsExtShaderAtomicInt64Supported() const { 492 bool IsExtShaderAtomicInt64Supported() const {
340 return ext_shader_atomic_int64; 493 return extensions.shader_atomic_int64;
341 } 494 }
342 495
343 /// Returns the minimum supported version of SPIR-V. 496 /// Returns the minimum supported version of SPIR-V.
@@ -345,7 +498,7 @@ public:
345 if (instance_version >= VK_API_VERSION_1_3) { 498 if (instance_version >= VK_API_VERSION_1_3) {
346 return 0x00010600U; 499 return 0x00010600U;
347 } 500 }
348 if (khr_spirv_1_4) { 501 if (extensions.spirv_1_4) {
349 return 0x00010400U; 502 return 0x00010400U;
350 } 503 }
351 return 0x00010000U; 504 return 0x00010000U;
@@ -363,11 +516,11 @@ public:
363 516
364 /// Returns the vendor name reported from Vulkan. 517 /// Returns the vendor name reported from Vulkan.
365 std::string_view GetVendorName() const { 518 std::string_view GetVendorName() const {
366 return vendor_name; 519 return properties.driver.driverName;
367 } 520 }
368 521
369 /// Returns the list of available extensions. 522 /// Returns the list of available extensions.
370 const std::vector<std::string>& GetAvailableExtensions() const { 523 const std::set<std::string, std::less<>>& GetAvailableExtensions() const {
371 return supported_extensions; 524 return supported_extensions;
372 } 525 }
373 526
@@ -376,7 +529,7 @@ public:
376 } 529 }
377 530
378 bool CanReportMemoryUsage() const { 531 bool CanReportMemoryUsage() const {
379 return ext_memory_budget; 532 return extensions.memory_budget;
380 } 533 }
381 534
382 u64 GetDeviceMemoryUsage() const; 535 u64 GetDeviceMemoryUsage() const;
@@ -397,33 +550,30 @@ public:
397 return must_emulate_bgr565; 550 return must_emulate_bgr565;
398 } 551 }
399 552
553 bool HasNullDescriptor() const {
554 return features.robustness2.nullDescriptor;
555 }
556
400 u32 GetMaxVertexInputAttributes() const { 557 u32 GetMaxVertexInputAttributes() const {
401 return max_vertex_input_attributes; 558 return properties.properties.limits.maxVertexInputAttributes;
402 } 559 }
403 560
404 u32 GetMaxVertexInputBindings() const { 561 u32 GetMaxVertexInputBindings() const {
405 return max_vertex_input_bindings; 562 return properties.properties.limits.maxVertexInputBindings;
406 } 563 }
407 564
408private: 565private:
409 /// Checks if the physical device is suitable. 566 /// Checks if the physical device is suitable and configures the object state
410 void CheckSuitability(bool requires_swapchain) const; 567 /// with all necessary info about its properties.
568 bool GetSuitability(bool requires_swapchain);
411 569
412 /// Loads extensions into a vector and stores available ones in this object. 570 // Remove extensions which have incomplete feature support.
413 std::vector<const char*> LoadExtensions(bool requires_surface); 571 void RemoveUnsuitableExtensions();
572 void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
414 573
415 /// Sets up queue families. 574 /// Sets up queue families.
416 void SetupFamilies(VkSurfaceKHR surface); 575 void SetupFamilies(VkSurfaceKHR surface);
417 576
418 /// Sets up device features.
419 void SetupFeatures();
420
421 /// Sets up device properties.
422 void SetupProperties();
423
424 /// Collects telemetry information from the device.
425 void CollectTelemetryParameters();
426
427 /// Collects information about attached tools. 577 /// Collects information about attached tools.
428 void CollectToolingInfo(); 578 void CollectToolingInfo();
429 579
@@ -434,90 +584,93 @@ private:
434 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 584 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
435 585
436 /// Returns true if ASTC textures are natively supported. 586 /// Returns true if ASTC textures are natively supported.
437 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; 587 bool ComputeIsOptimalAstcSupported() const;
438 588
439 /// Returns true if the device natively supports blitting depth stencil images. 589 /// Returns true if the device natively supports blitting depth stencil images.
440 bool TestDepthStencilBlits() const; 590 bool TestDepthStencilBlits() const;
441 591
442 VkInstance instance; ///< Vulkan instance. 592private:
443 vk::DeviceDispatch dld; ///< Device function pointers. 593 VkInstance instance; ///< Vulkan instance.
444 vk::PhysicalDevice physical; ///< Physical device. 594 vk::DeviceDispatch dld; ///< Device function pointers.
445 VkPhysicalDeviceProperties properties; ///< Device properties. 595 vk::PhysicalDevice physical; ///< Physical device.
446 VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. 596 vk::Device logical; ///< Logical device.
447 vk::Device logical; ///< Logical device. 597 vk::Queue graphics_queue; ///< Main graphics queue.
448 vk::Queue graphics_queue; ///< Main graphics queue. 598 vk::Queue present_queue; ///< Main present queue.
449 vk::Queue present_queue; ///< Main present queue. 599 u32 instance_version{}; ///< Vulkan instance version.
450 u32 instance_version{}; ///< Vulkan onstance version. 600 u32 graphics_family{}; ///< Main graphics queue family index.
451 u32 graphics_family{}; ///< Main graphics queue family index. 601 u32 present_family{}; ///< Main present queue family index.
452 u32 present_family{}; ///< Main present queue family index. 602
453 VkDriverIdKHR driver_id{}; ///< Driver ID. 603 struct Extensions {
454 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 604#define EXTENSION(prefix, macro_name, var_name) bool var_name{};
455 u64 device_access_memory{}; ///< Total size of device local memory in bytes. 605#define FEATURE(prefix, struct_name, macro_name, var_name) bool var_name{};
456 u32 max_push_descriptors{}; ///< Maximum number of push descriptors 606
457 u32 sets_per_pool{}; ///< Sets per Description Pool 607 FOR_EACH_VK_FEATURE_1_1(FEATURE);
458 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 608 FOR_EACH_VK_FEATURE_1_2(FEATURE);
459 bool is_float16_supported{}; ///< Support for float16 arithmetic. 609 FOR_EACH_VK_FEATURE_1_3(FEATURE);
460 bool is_int8_supported{}; ///< Support for int8 arithmetic. 610 FOR_EACH_VK_FEATURE_EXT(FEATURE);
461 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 611 FOR_EACH_VK_EXTENSION(EXTENSION);
462 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 612 FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
463 bool is_depth_bounds_supported{}; ///< Support for depth bounds. 613
464 bool is_shader_float64_supported{}; ///< Support for float64. 614#undef EXTENSION
465 bool is_shader_int64_supported{}; ///< Support for int64. 615#undef FEATURE
466 bool is_shader_int16_supported{}; ///< Support for int16. 616 };
467 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. 617
468 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. 618 struct Features {
469 bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list 619#define FEATURE_CORE(prefix, struct_name, macro_name, var_name) \
470 ///< topologies. 620 VkPhysicalDevice##struct_name##Features var_name{};
471 bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. 621#define FEATURE_EXT(prefix, struct_name, macro_name, var_name) \
472 bool is_integrated{}; ///< Is GPU an iGPU. 622 VkPhysicalDevice##struct_name##Features##prefix var_name{};
473 bool is_virtual{}; ///< Is GPU a virtual GPU. 623
474 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. 624 FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE);
475 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 625 FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE);
476 bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. 626 FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE);
477 bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. 627 FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT);
478 bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. 628
479 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. 629#undef FEATURE_CORE
480 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. 630#undef FEATURE_EXT
481 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. 631
482 bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. 632 VkPhysicalDeviceFeatures features{};
483 bool khr_pipeline_executable_properties{}; ///< Support for executable properties. 633 };
484 bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. 634
485 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 635 struct Properties {
486 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. 636 VkPhysicalDeviceDriverProperties driver{};
487 bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control 637 VkPhysicalDeviceFloatControlsProperties float_controls{};
488 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 638 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
489 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 639 VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
490 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. 640 VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
491 bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. 641
492 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 642 VkPhysicalDeviceProperties properties{};
493 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 643 };
494 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 644
495 bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. 645 Extensions extensions{};
496 bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. 646 Features features{};
497 bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. 647 Properties properties{};
498 bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. 648
499 bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. 649 VkPhysicalDeviceFeatures2 features2{};
500 bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. 650 VkPhysicalDeviceProperties2 properties2{};
501 bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. 651
502 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 652 // Misc features
503 bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. 653 bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats.
504 bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. 654 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
505 bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. 655 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
506 bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. 656 bool is_integrated{}; ///< Is GPU an iGPU.
507 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 657 bool is_virtual{}; ///< Is GPU a virtual GPU.
508 bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit 658 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
509 bool has_renderdoc{}; ///< Has RenderDoc attached 659 bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
510 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 660 bool has_renderdoc{}; ///< Has RenderDoc attached
511 bool supports_d24_depth{}; ///< Supports D24 depth buffers. 661 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
512 bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. 662 bool supports_d24_depth{}; ///< Supports D24 depth buffers.
513 bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. 663 bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
514 u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline 664 bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
515 u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline 665 bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
666 bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
667 u64 device_access_memory{}; ///< Total size of device local memory in bytes.
668 u32 sets_per_pool{}; ///< Sets per Description Pool
516 669
517 // Telemetry parameters 670 // Telemetry parameters
518 std::string vendor_name; ///< Device's driver name. 671 std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
519 std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. 672 std::set<std::string, std::less<>> loaded_extensions; ///< Loaded Vulkan extensions.
520 std::vector<size_t> valid_heap_memory; ///< Heaps used. 673 std::vector<size_t> valid_heap_memory; ///< Heaps used.
521 674
522 /// Format properties dictionary. 675 /// Format properties dictionary.
523 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 676 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 562039b56..b6d83e446 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -32,7 +32,7 @@
32namespace Vulkan { 32namespace Vulkan {
33namespace { 33namespace {
34[[nodiscard]] std::vector<const char*> RequiredExtensions( 34[[nodiscard]] std::vector<const char*> RequiredExtensions(
35 Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { 35 Core::Frontend::WindowSystemType window_type, bool enable_validation) {
36 std::vector<const char*> extensions; 36 std::vector<const char*> extensions;
37 extensions.reserve(6); 37 extensions.reserve(6);
38 switch (window_type) { 38 switch (window_type) {
@@ -65,7 +65,7 @@ namespace {
65 if (window_type != Core::Frontend::WindowSystemType::Headless) { 65 if (window_type != Core::Frontend::WindowSystemType::Headless) {
66 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); 66 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
67 } 67 }
68 if (enable_debug_utils) { 68 if (enable_validation) {
69 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); 69 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
70 } 70 }
71 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); 71 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -95,9 +95,9 @@ namespace {
95 return true; 95 return true;
96} 96}
97 97
98[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { 98[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) {
99 std::vector<const char*> layers; 99 std::vector<const char*> layers;
100 if (enable_layers) { 100 if (enable_validation) {
101 layers.push_back("VK_LAYER_KHRONOS_validation"); 101 layers.push_back("VK_LAYER_KHRONOS_validation");
102 } 102 }
103 return layers; 103 return layers;
@@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const
125 125
126vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, 126vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
127 u32 required_version, Core::Frontend::WindowSystemType window_type, 127 u32 required_version, Core::Frontend::WindowSystemType window_type,
128 bool enable_debug_utils, bool enable_layers) { 128 bool enable_validation) {
129 if (!library.IsOpen()) { 129 if (!library.IsOpen()) {
130 LOG_ERROR(Render_Vulkan, "Vulkan library not available"); 130 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
131 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 131 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
@@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
138 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); 138 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
139 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 139 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
140 } 140 }
141 const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); 141 const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation);
142 if (!AreExtensionsSupported(dld, extensions)) { 142 if (!AreExtensionsSupported(dld, extensions)) {
143 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 143 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
144 } 144 }
145 std::vector<const char*> layers = Layers(enable_layers); 145 std::vector<const char*> layers = Layers(enable_validation);
146 RemoveUnavailableLayers(dld, layers); 146 RemoveUnavailableLayers(dld, layers);
147 147
148 const u32 available_version = vk::AvailableVersion(dld); 148 const u32 available_version = vk::AvailableVersion(dld);
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
index 40419d802..b59b92f83 100644
--- a/src/video_core/vulkan_common/vulkan_instance.h
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -17,8 +17,7 @@ namespace Vulkan {
17 * @param dld Dispatch table to load function pointers into 17 * @param dld Dispatch table to load function pointers into
18 * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) 18 * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
19 * @param window_type Window system type's enabled extension 19 * @param window_type Window system type's enabled extension
20 * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not 20 * @param enable_validation Whether to enable Vulkan validation layers or not
21 * @param enable_layers Whether to enable Vulkan validation layers or not
22 * 21 *
23 * @return A new Vulkan instance 22 * @return A new Vulkan instance
24 * @throw vk::Exception on failure 23 * @throw vk::Exception on failure
@@ -26,6 +25,6 @@ namespace Vulkan {
26[[nodiscard]] vk::Instance CreateInstance( 25[[nodiscard]] vk::Instance CreateInstance(
27 const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, 26 const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
28 Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, 27 Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
29 bool enable_debug_utils = false, bool enable_layers = false); 28 bool enable_validation = false);
30 29
31} // namespace Vulkan 30} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 861767c13..486d4dfaf 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -96,8 +96,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
96 X(vkCmdDrawIndexed); 96 X(vkCmdDrawIndexed);
97 X(vkCmdDrawIndirect); 97 X(vkCmdDrawIndirect);
98 X(vkCmdDrawIndexedIndirect); 98 X(vkCmdDrawIndexedIndirect);
99 X(vkCmdDrawIndirectCountKHR); 99 X(vkCmdDrawIndirectCount);
100 X(vkCmdDrawIndexedIndirectCountKHR); 100 X(vkCmdDrawIndexedIndirectCount);
101 X(vkCmdEndQuery); 101 X(vkCmdEndQuery);
102 X(vkCmdEndRenderPass); 102 X(vkCmdEndRenderPass);
103 X(vkCmdEndTransformFeedbackEXT); 103 X(vkCmdEndTransformFeedbackEXT);
@@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
152 X(vkCreateGraphicsPipelines); 152 X(vkCreateGraphicsPipelines);
153 X(vkCreateImage); 153 X(vkCreateImage);
154 X(vkCreateImageView); 154 X(vkCreateImageView);
155 X(vkCreatePipelineCache);
155 X(vkCreatePipelineLayout); 156 X(vkCreatePipelineLayout);
156 X(vkCreateQueryPool); 157 X(vkCreateQueryPool);
157 X(vkCreateRenderPass); 158 X(vkCreateRenderPass);
@@ -171,6 +172,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
171 X(vkDestroyImage); 172 X(vkDestroyImage);
172 X(vkDestroyImageView); 173 X(vkDestroyImageView);
173 X(vkDestroyPipeline); 174 X(vkDestroyPipeline);
175 X(vkDestroyPipelineCache);
174 X(vkDestroyPipelineLayout); 176 X(vkDestroyPipelineLayout);
175 X(vkDestroyQueryPool); 177 X(vkDestroyQueryPool);
176 X(vkDestroyRenderPass); 178 X(vkDestroyRenderPass);
@@ -188,6 +190,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
188 X(vkGetEventStatus); 190 X(vkGetEventStatus);
189 X(vkGetFenceStatus); 191 X(vkGetFenceStatus);
190 X(vkGetImageMemoryRequirements); 192 X(vkGetImageMemoryRequirements);
193 X(vkGetPipelineCacheData);
191 X(vkGetMemoryFdKHR); 194 X(vkGetMemoryFdKHR);
192#ifdef _WIN32 195#ifdef _WIN32
193 X(vkGetMemoryWin32HandleKHR); 196 X(vkGetMemoryWin32HandleKHR);
@@ -218,6 +221,12 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
218 if (!dld.vkResetQueryPool) { 221 if (!dld.vkResetQueryPool) {
219 Proc(dld.vkResetQueryPool, dld, "vkResetQueryPoolEXT", device); 222 Proc(dld.vkResetQueryPool, dld, "vkResetQueryPoolEXT", device);
220 } 223 }
224
225 // Support for draw indirect with count is optional in Vulkan 1.2
226 if (!dld.vkCmdDrawIndirectCount) {
227 Proc(dld.vkCmdDrawIndirectCount, dld, "vkCmdDrawIndirectCountKHR", device);
228 Proc(dld.vkCmdDrawIndexedIndirectCount, dld, "vkCmdDrawIndexedIndirectCountKHR", device);
229 }
221#undef X 230#undef X
222} 231}
223 232
@@ -431,6 +440,10 @@ void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noex
431 dld.vkDestroyPipeline(device, handle, nullptr); 440 dld.vkDestroyPipeline(device, handle, nullptr);
432} 441}
433 442
443void Destroy(VkDevice device, VkPipelineCache handle, const DeviceDispatch& dld) noexcept {
444 dld.vkDestroyPipelineCache(device, handle, nullptr);
445}
446
434void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { 447void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept {
435 dld.vkDestroyPipelineLayout(device, handle, nullptr); 448 dld.vkDestroyPipelineLayout(device, handle, nullptr);
436} 449}
@@ -651,6 +664,10 @@ void ShaderModule::SetObjectNameEXT(const char* name) const {
651 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); 664 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
652} 665}
653 666
667void PipelineCache::SetObjectNameEXT(const char* name) const {
668 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_PIPELINE_CACHE, name);
669}
670
654void Semaphore::SetObjectNameEXT(const char* name) const { 671void Semaphore::SetObjectNameEXT(const char* name) const {
655 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); 672 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
656} 673}
@@ -746,21 +763,29 @@ DescriptorSetLayout Device::CreateDescriptorSetLayout(
746 return DescriptorSetLayout(object, handle, *dld); 763 return DescriptorSetLayout(object, handle, *dld);
747} 764}
748 765
766PipelineCache Device::CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const {
767 VkPipelineCache cache;
768 Check(dld->vkCreatePipelineCache(handle, &ci, nullptr, &cache));
769 return PipelineCache(cache, handle, *dld);
770}
771
749PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { 772PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
750 VkPipelineLayout object; 773 VkPipelineLayout object;
751 Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); 774 Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
752 return PipelineLayout(object, handle, *dld); 775 return PipelineLayout(object, handle, *dld);
753} 776}
754 777
755Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { 778Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci,
779 VkPipelineCache cache) const {
756 VkPipeline object; 780 VkPipeline object;
757 Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); 781 Check(dld->vkCreateGraphicsPipelines(handle, cache, 1, &ci, nullptr, &object));
758 return Pipeline(object, handle, *dld); 782 return Pipeline(object, handle, *dld);
759} 783}
760 784
761Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { 785Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci,
786 VkPipelineCache cache) const {
762 VkPipeline object; 787 VkPipeline object;
763 Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); 788 Check(dld->vkCreateComputePipelines(handle, cache, 1, &ci, nullptr, &object));
764 return Pipeline(object, handle, *dld); 789 return Pipeline(object, handle, *dld);
765} 790}
766 791
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index accfad8c1..e86f661cb 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -215,8 +215,8 @@ struct DeviceDispatch : InstanceDispatch {
215 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; 215 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
216 PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; 216 PFN_vkCmdDrawIndirect vkCmdDrawIndirect{};
217 PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; 217 PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
218 PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{}; 218 PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
219 PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{}; 219 PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
220 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; 220 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
221 PFN_vkCmdEndQuery vkCmdEndQuery{}; 221 PFN_vkCmdEndQuery vkCmdEndQuery{};
222 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; 222 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
@@ -270,6 +270,7 @@ struct DeviceDispatch : InstanceDispatch {
270 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{}; 270 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{};
271 PFN_vkCreateImage vkCreateImage{}; 271 PFN_vkCreateImage vkCreateImage{};
272 PFN_vkCreateImageView vkCreateImageView{}; 272 PFN_vkCreateImageView vkCreateImageView{};
273 PFN_vkCreatePipelineCache vkCreatePipelineCache{};
273 PFN_vkCreatePipelineLayout vkCreatePipelineLayout{}; 274 PFN_vkCreatePipelineLayout vkCreatePipelineLayout{};
274 PFN_vkCreateQueryPool vkCreateQueryPool{}; 275 PFN_vkCreateQueryPool vkCreateQueryPool{};
275 PFN_vkCreateRenderPass vkCreateRenderPass{}; 276 PFN_vkCreateRenderPass vkCreateRenderPass{};
@@ -289,6 +290,7 @@ struct DeviceDispatch : InstanceDispatch {
289 PFN_vkDestroyImage vkDestroyImage{}; 290 PFN_vkDestroyImage vkDestroyImage{};
290 PFN_vkDestroyImageView vkDestroyImageView{}; 291 PFN_vkDestroyImageView vkDestroyImageView{};
291 PFN_vkDestroyPipeline vkDestroyPipeline{}; 292 PFN_vkDestroyPipeline vkDestroyPipeline{};
293 PFN_vkDestroyPipelineCache vkDestroyPipelineCache{};
292 PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{}; 294 PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{};
293 PFN_vkDestroyQueryPool vkDestroyQueryPool{}; 295 PFN_vkDestroyQueryPool vkDestroyQueryPool{};
294 PFN_vkDestroyRenderPass vkDestroyRenderPass{}; 296 PFN_vkDestroyRenderPass vkDestroyRenderPass{};
@@ -306,6 +308,7 @@ struct DeviceDispatch : InstanceDispatch {
306 PFN_vkGetEventStatus vkGetEventStatus{}; 308 PFN_vkGetEventStatus vkGetEventStatus{};
307 PFN_vkGetFenceStatus vkGetFenceStatus{}; 309 PFN_vkGetFenceStatus vkGetFenceStatus{};
308 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; 310 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
311 PFN_vkGetPipelineCacheData vkGetPipelineCacheData{};
309 PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; 312 PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
310#ifdef _WIN32 313#ifdef _WIN32
311 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; 314 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
@@ -351,6 +354,7 @@ void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
351void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; 354void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
352void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; 355void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept;
353void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; 356void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept;
357void Destroy(VkDevice, VkPipelineCache, const DeviceDispatch&) noexcept;
354void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; 358void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept;
355void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; 359void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept;
356void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; 360void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept;
@@ -773,6 +777,18 @@ public:
773 void SetObjectNameEXT(const char* name) const; 777 void SetObjectNameEXT(const char* name) const;
774}; 778};
775 779
780class PipelineCache : public Handle<VkPipelineCache, VkDevice, DeviceDispatch> {
781 using Handle<VkPipelineCache, VkDevice, DeviceDispatch>::Handle;
782
783public:
784 /// Set object name.
785 void SetObjectNameEXT(const char* name) const;
786
787 VkResult Read(size_t* size, void* data) const noexcept {
788 return dld->vkGetPipelineCacheData(owner, handle, size, data);
789 }
790};
791
776class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { 792class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
777 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; 793 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
778 794
@@ -844,11 +860,15 @@ public:
844 860
845 DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; 861 DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const;
846 862
863 PipelineCache CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const;
864
847 PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; 865 PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const;
848 866
849 Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; 867 Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci,
868 VkPipelineCache cache = nullptr) const;
850 869
851 Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; 870 Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci,
871 VkPipelineCache cache = nullptr) const;
852 872
853 Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; 873 Sampler CreateSampler(const VkSamplerCreateInfo& ci) const;
854 874
@@ -1045,15 +1065,15 @@ public:
1045 1065
1046 void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, 1066 void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer,
1047 VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { 1067 VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept {
1048 dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset, 1068 dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset,
1049 draw_count, stride); 1069 draw_count, stride);
1050 } 1070 }
1051 1071
1052 void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, 1072 void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset,
1053 VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, 1073 VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count,
1054 u32 stride) const noexcept { 1074 u32 stride) const noexcept {
1055 dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, 1075 dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer,
1056 count_offset, draw_count, stride); 1076 count_offset, draw_count, stride);
1057 } 1077 }
1058 1078
1059 void ClearAttachments(Span<VkClearAttachment> attachments, 1079 void ClearAttachments(Span<VkClearAttachment> attachments,
diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist
index 0eb377926..f05f3186c 100644
--- a/src/yuzu/Info.plist
+++ b/src/yuzu/Info.plist
@@ -34,6 +34,8 @@ SPDX-License-Identifier: GPL-2.0-or-later
34 <string></string> 34 <string></string>
35 <key>CSResourcesFileMapped</key> 35 <key>CSResourcesFileMapped</key>
36 <true/> 36 <true/>
37 <key>LSApplicationCategoryType</key>
38 <string>public.app-category.games</string>
37 <key>LSRequiresCarbon</key> 39 <key>LSRequiresCarbon</key>
38 <true/> 40 <true/>
39 <key>NSHumanReadableCopyright</key> 41 <key>NSHumanReadableCopyright</key>
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 3d560f303..d65991734 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -96,9 +96,9 @@ void EmuThread::run() {
96 m_is_running.store(false); 96 m_is_running.store(false);
97 m_is_running.notify_all(); 97 m_is_running.notify_all();
98 98
99 emit DebugModeEntered(); 99 EmulationPaused(lk);
100 Common::CondvarWait(m_should_run_cv, lk, stop_token, [&] { return m_should_run; }); 100 Common::CondvarWait(m_should_run_cv, lk, stop_token, [&] { return m_should_run; });
101 emit DebugModeLeft(); 101 EmulationResumed(lk);
102 } 102 }
103 } 103 }
104 104
@@ -111,6 +111,21 @@ void EmuThread::run() {
111#endif 111#endif
112} 112}
113 113
114// Unlock while emitting signals so that the main thread can
115// continue pumping events.
116
117void EmuThread::EmulationPaused(std::unique_lock<std::mutex>& lk) {
118 lk.unlock();
119 emit DebugModeEntered();
120 lk.lock();
121}
122
123void EmuThread::EmulationResumed(std::unique_lock<std::mutex>& lk) {
124 lk.unlock();
125 emit DebugModeLeft();
126 lk.lock();
127}
128
114#ifdef HAS_OPENGL 129#ifdef HAS_OPENGL
115class OpenGLSharedContext : public Core::Frontend::GraphicsContext { 130class OpenGLSharedContext : public Core::Frontend::GraphicsContext {
116public: 131public:
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index eca16b313..092c6206f 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -92,6 +92,10 @@ public:
92 } 92 }
93 93
94private: 94private:
95 void EmulationPaused(std::unique_lock<std::mutex>& lk);
96 void EmulationResumed(std::unique_lock<std::mutex>& lk);
97
98private:
95 Core::System& m_system; 99 Core::System& m_system;
96 100
97 std::stop_source m_stop_source; 101 std::stop_source m_stop_source;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9425b5bd..0db62baa3 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -690,6 +690,7 @@ void Config::ReadRendererValues() {
690 qt_config->beginGroup(QStringLiteral("Renderer")); 690 qt_config->beginGroup(QStringLiteral("Renderer"));
691 691
692 ReadGlobalSetting(Settings::values.renderer_backend); 692 ReadGlobalSetting(Settings::values.renderer_backend);
693 ReadGlobalSetting(Settings::values.renderer_force_max_clock);
693 ReadGlobalSetting(Settings::values.vulkan_device); 694 ReadGlobalSetting(Settings::values.vulkan_device);
694 ReadGlobalSetting(Settings::values.fullscreen_mode); 695 ReadGlobalSetting(Settings::values.fullscreen_mode);
695 ReadGlobalSetting(Settings::values.aspect_ratio); 696 ReadGlobalSetting(Settings::values.aspect_ratio);
@@ -709,6 +710,7 @@ void Config::ReadRendererValues() {
709 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 710 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
710 ReadGlobalSetting(Settings::values.use_fast_gpu_time); 711 ReadGlobalSetting(Settings::values.use_fast_gpu_time);
711 ReadGlobalSetting(Settings::values.use_pessimistic_flushes); 712 ReadGlobalSetting(Settings::values.use_pessimistic_flushes);
713 ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
712 ReadGlobalSetting(Settings::values.bg_red); 714 ReadGlobalSetting(Settings::values.bg_red);
713 ReadGlobalSetting(Settings::values.bg_green); 715 ReadGlobalSetting(Settings::values.bg_green);
714 ReadGlobalSetting(Settings::values.bg_blue); 716 ReadGlobalSetting(Settings::values.bg_blue);
@@ -1305,6 +1307,9 @@ void Config::SaveRendererValues() {
1305 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), 1307 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
1306 static_cast<u32>(Settings::values.renderer_backend.GetDefault()), 1308 static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
1307 Settings::values.renderer_backend.UsingGlobal()); 1309 Settings::values.renderer_backend.UsingGlobal());
1310 WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()),
1311 static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)),
1312 static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault()));
1308 WriteGlobalSetting(Settings::values.vulkan_device); 1313 WriteGlobalSetting(Settings::values.vulkan_device);
1309 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), 1314 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
1310 static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), 1315 static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)),
@@ -1348,6 +1353,7 @@ void Config::SaveRendererValues() {
1348 WriteGlobalSetting(Settings::values.use_asynchronous_shaders); 1353 WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
1349 WriteGlobalSetting(Settings::values.use_fast_gpu_time); 1354 WriteGlobalSetting(Settings::values.use_fast_gpu_time);
1350 WriteGlobalSetting(Settings::values.use_pessimistic_flushes); 1355 WriteGlobalSetting(Settings::values.use_pessimistic_flushes);
1356 WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
1351 WriteGlobalSetting(Settings::values.bg_red); 1357 WriteGlobalSetting(Settings::values.bg_red);
1352 WriteGlobalSetting(Settings::values.bg_green); 1358 WriteGlobalSetting(Settings::values.bg_green);
1353 WriteGlobalSetting(Settings::values.bg_blue); 1359 WriteGlobalSetting(Settings::values.bg_blue);
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index aa02cc63c..bb9910a53 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -366,6 +366,11 @@
366 </item> 366 </item>
367 <item> 367 <item>
368 <property name="text"> 368 <property name="text">
369 <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string>
370 </property>
371 </item>
372 <item>
373 <property name="text">
369 <string>2X (1440p/2160p)</string> 374 <string>2X (1440p/2160p)</string>
370 </property> 375 </property>
371 </item> 376 </item>
@@ -389,6 +394,16 @@
389 <string>6X (4320p/6480p)</string> 394 <string>6X (4320p/6480p)</string>
390 </property> 395 </property>
391 </item> 396 </item>
397 <item>
398 <property name="text">
399 <string>7X (5040p/7560p)</string>
400 </property>
401 </item>
402 <item>
403 <property name="text">
404 <string>8X (5760p/8640p)</string>
405 </property>
406 </item>
392 </widget> 407 </widget>
393 </item> 408 </item>
394 </layout> 409 </layout>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 01f074699..cc0155a2c 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -22,13 +22,17 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
22void ConfigureGraphicsAdvanced::SetConfiguration() { 22void ConfigureGraphicsAdvanced::SetConfiguration() {
23 const bool runtime_lock = !system.IsPoweredOn(); 23 const bool runtime_lock = !system.IsPoweredOn();
24 ui->use_vsync->setEnabled(runtime_lock); 24 ui->use_vsync->setEnabled(runtime_lock);
25 ui->renderer_force_max_clock->setEnabled(runtime_lock);
25 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 26 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
26 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 27 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
27 28
29 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
28 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); 30 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
29 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 31 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
30 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 32 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
31 ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); 33 ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue());
34 ui->use_vulkan_driver_pipeline_cache->setChecked(
35 Settings::values.use_vulkan_driver_pipeline_cache.GetValue());
32 36
33 if (Settings::IsConfiguringGlobal()) { 37 if (Settings::IsConfiguringGlobal()) {
34 ui->gpu_accuracy->setCurrentIndex( 38 ui->gpu_accuracy->setCurrentIndex(
@@ -41,6 +45,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
41 &Settings::values.max_anisotropy); 45 &Settings::values.max_anisotropy);
42 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, 46 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
43 !Settings::values.gpu_accuracy.UsingGlobal()); 47 !Settings::values.gpu_accuracy.UsingGlobal());
48 ConfigurationShared::SetHighlight(ui->renderer_force_max_clock,
49 !Settings::values.renderer_force_max_clock.UsingGlobal());
44 ConfigurationShared::SetHighlight(ui->af_label, 50 ConfigurationShared::SetHighlight(ui->af_label,
45 !Settings::values.max_anisotropy.UsingGlobal()); 51 !Settings::values.max_anisotropy.UsingGlobal());
46 } 52 }
@@ -48,6 +54,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
48 54
49void ConfigureGraphicsAdvanced::ApplyConfiguration() { 55void ConfigureGraphicsAdvanced::ApplyConfiguration() {
50 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); 56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
57 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
58 ui->renderer_force_max_clock,
59 renderer_force_max_clock);
51 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, 60 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
52 ui->anisotropic_filtering_combobox); 61 ui->anisotropic_filtering_combobox);
53 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); 62 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
@@ -58,6 +67,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
58 ui->use_fast_gpu_time, use_fast_gpu_time); 67 ui->use_fast_gpu_time, use_fast_gpu_time);
59 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, 68 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes,
60 ui->use_pessimistic_flushes, use_pessimistic_flushes); 69 ui->use_pessimistic_flushes, use_pessimistic_flushes);
70 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache,
71 ui->use_vulkan_driver_pipeline_cache,
72 use_vulkan_driver_pipeline_cache);
61} 73}
62 74
63void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { 75void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
@@ -76,18 +88,25 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
76 // Disable if not global (only happens during game) 88 // Disable if not global (only happens during game)
77 if (Settings::IsConfiguringGlobal()) { 89 if (Settings::IsConfiguringGlobal()) {
78 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); 90 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
91 ui->renderer_force_max_clock->setEnabled(
92 Settings::values.renderer_force_max_clock.UsingGlobal());
79 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); 93 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
80 ui->use_asynchronous_shaders->setEnabled( 94 ui->use_asynchronous_shaders->setEnabled(
81 Settings::values.use_asynchronous_shaders.UsingGlobal()); 95 Settings::values.use_asynchronous_shaders.UsingGlobal());
82 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 96 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
83 ui->use_pessimistic_flushes->setEnabled( 97 ui->use_pessimistic_flushes->setEnabled(
84 Settings::values.use_pessimistic_flushes.UsingGlobal()); 98 Settings::values.use_pessimistic_flushes.UsingGlobal());
99 ui->use_vulkan_driver_pipeline_cache->setEnabled(
100 Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal());
85 ui->anisotropic_filtering_combobox->setEnabled( 101 ui->anisotropic_filtering_combobox->setEnabled(
86 Settings::values.max_anisotropy.UsingGlobal()); 102 Settings::values.max_anisotropy.UsingGlobal());
87 103
88 return; 104 return;
89 } 105 }
90 106
107 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
108 Settings::values.renderer_force_max_clock,
109 renderer_force_max_clock);
91 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); 110 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
92 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, 111 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
93 Settings::values.use_asynchronous_shaders, 112 Settings::values.use_asynchronous_shaders,
@@ -97,6 +116,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
97 ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, 116 ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes,
98 Settings::values.use_pessimistic_flushes, 117 Settings::values.use_pessimistic_flushes,
99 use_pessimistic_flushes); 118 use_pessimistic_flushes);
119 ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache,
120 Settings::values.use_vulkan_driver_pipeline_cache,
121 use_vulkan_driver_pipeline_cache);
100 ConfigurationShared::SetColoredComboBox( 122 ConfigurationShared::SetColoredComboBox(
101 ui->gpu_accuracy, ui->label_gpu_accuracy, 123 ui->gpu_accuracy, ui->label_gpu_accuracy,
102 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); 124 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 12e816905..df557d585 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -36,10 +36,12 @@ private:
36 36
37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; 37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
38 38
39 ConfigurationShared::CheckState renderer_force_max_clock;
39 ConfigurationShared::CheckState use_vsync; 40 ConfigurationShared::CheckState use_vsync;
40 ConfigurationShared::CheckState use_asynchronous_shaders; 41 ConfigurationShared::CheckState use_asynchronous_shaders;
41 ConfigurationShared::CheckState use_fast_gpu_time; 42 ConfigurationShared::CheckState use_fast_gpu_time;
42 ConfigurationShared::CheckState use_pessimistic_flushes; 43 ConfigurationShared::CheckState use_pessimistic_flushes;
44 ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;
43 45
44 const Core::System& system; 46 const Core::System& system;
45}; 47};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 87a121471..061885e30 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -70,6 +70,16 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QCheckBox" name="renderer_force_max_clock">
74 <property name="toolTip">
75 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
76 </property>
77 <property name="text">
78 <string>Force maximum clocks (Vulkan only)</string>
79 </property>
80 </widget>
81 </item>
82 <item>
73 <widget class="QCheckBox" name="use_vsync"> 83 <widget class="QCheckBox" name="use_vsync">
74 <property name="toolTip"> 84 <property name="toolTip">
75 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> 85 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string>
@@ -110,6 +120,16 @@
110 </widget> 120 </widget>
111 </item> 121 </item>
112 <item> 122 <item>
123 <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
124 <property name="toolTip">
125 <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
126 </property>
127 <property name="text">
128 <string>Use Vulkan pipeline cache</string>
129 </property>
130 </widget>
131 </item>
132 <item>
113 <widget class="QWidget" name="af_layout" native="true"> 133 <widget class="QWidget" name="af_layout" native="true">
114 <layout class="QHBoxLayout" name="horizontalLayout_1"> 134 <layout class="QHBoxLayout" name="horizontalLayout_1">
115 <property name="leftMargin"> 135 <property name="leftMargin">
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 183cbe562..c40d980c9 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -1466,6 +1466,12 @@ void ConfigureInputPlayer::mousePressEvent(QMouseEvent* event) {
1466 input_subsystem->GetMouse()->PressButton(0, 0, 0, 0, button); 1466 input_subsystem->GetMouse()->PressButton(0, 0, 0, 0, button);
1467} 1467}
1468 1468
1469void ConfigureInputPlayer::wheelEvent(QWheelEvent* event) {
1470 const int x = event->angleDelta().x();
1471 const int y = event->angleDelta().y();
1472 input_subsystem->GetMouse()->MouseWheelChange(x, y);
1473}
1474
1469void ConfigureInputPlayer::keyPressEvent(QKeyEvent* event) { 1475void ConfigureInputPlayer::keyPressEvent(QKeyEvent* event) {
1470 if (!input_setter || !event) { 1476 if (!input_setter || !event) {
1471 return; 1477 return;
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index 6d1876f2b..99a9c875d 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -116,6 +116,9 @@ private:
116 /// Handle mouse button press events. 116 /// Handle mouse button press events.
117 void mousePressEvent(QMouseEvent* event) override; 117 void mousePressEvent(QMouseEvent* event) override;
118 118
119 /// Handle mouse wheel move events.
120 void wheelEvent(QWheelEvent* event) override;
121
119 /// Handle key press events. 122 /// Handle key press events.
120 void keyPressEvent(QKeyEvent* event) override; 123 void keyPressEvent(QKeyEvent* event) override;
121 124
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 524650144..571eacf9f 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -1839,9 +1839,11 @@ void GMainWindow::OnEmulationStopTimeExpired() {
1839 1839
1840void GMainWindow::OnEmulationStopped() { 1840void GMainWindow::OnEmulationStopped() {
1841 shutdown_timer.stop(); 1841 shutdown_timer.stop();
1842 emu_thread->disconnect(); 1842 if (emu_thread) {
1843 emu_thread->wait(); 1843 emu_thread->disconnect();
1844 emu_thread = nullptr; 1844 emu_thread->wait();
1845 emu_thread.reset();
1846 }
1845 1847
1846 if (shutdown_dialog) { 1848 if (shutdown_dialog) {
1847 shutdown_dialog->deleteLater(); 1849 shutdown_dialog->deleteLater();
@@ -2229,8 +2231,10 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
2229 } 2231 }
2230 2232
2231 switch (target) { 2233 switch (target) {
2232 case GameListRemoveTarget::GlShaderCache:
2233 case GameListRemoveTarget::VkShaderCache: 2234 case GameListRemoveTarget::VkShaderCache:
2235 RemoveVulkanDriverPipelineCache(program_id);
2236 [[fallthrough]];
2237 case GameListRemoveTarget::GlShaderCache:
2234 RemoveTransferableShaderCache(program_id, target); 2238 RemoveTransferableShaderCache(program_id, target);
2235 break; 2239 break;
2236 case GameListRemoveTarget::AllShaderCache: 2240 case GameListRemoveTarget::AllShaderCache:
@@ -2271,6 +2275,22 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTa
2271 } 2275 }
2272} 2276}
2273 2277
2278void GMainWindow::RemoveVulkanDriverPipelineCache(u64 program_id) {
2279 static constexpr std::string_view target_file_name = "vulkan_pipelines.bin";
2280
2281 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
2282 const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id);
2283 const auto target_file = shader_cache_folder_path / target_file_name;
2284
2285 if (!Common::FS::Exists(target_file)) {
2286 return;
2287 }
2288 if (!Common::FS::RemoveFile(target_file)) {
2289 QMessageBox::warning(this, tr("Error Removing Vulkan Driver Pipeline Cache"),
2290 tr("Failed to remove the driver pipeline cache."));
2291 }
2292}
2293
2274void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { 2294void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) {
2275 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); 2295 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
2276 const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); 2296 const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id);
@@ -3011,6 +3031,8 @@ void GMainWindow::OnStopGame() {
3011 3031
3012 if (OnShutdownBegin()) { 3032 if (OnShutdownBegin()) {
3013 OnShutdownBeginDialog(); 3033 OnShutdownBeginDialog();
3034 } else {
3035 OnEmulationStopped();
3014 } 3036 }
3015} 3037}
3016 3038
@@ -3708,15 +3730,36 @@ void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_vie
3708 } 3730 }
3709} 3731}
3710 3732
3733std::string GMainWindow::CreateTASFramesString(
3734 std::array<size_t, InputCommon::TasInput::PLAYER_NUMBER> frames) const {
3735 std::string string = "";
3736 size_t maxPlayerIndex = 0;
3737 for (size_t i = 0; i < frames.size(); i++) {
3738 if (frames[i] != 0) {
3739 if (maxPlayerIndex != 0)
3740 string += ", ";
3741 while (maxPlayerIndex++ != i)
3742 string += "0, ";
3743 string += std::to_string(frames[i]);
3744 }
3745 }
3746 return string;
3747}
3748
3711QString GMainWindow::GetTasStateDescription() const { 3749QString GMainWindow::GetTasStateDescription() const {
3712 auto [tas_status, current_tas_frame, total_tas_frames] = input_subsystem->GetTas()->GetStatus(); 3750 auto [tas_status, current_tas_frame, total_tas_frames] = input_subsystem->GetTas()->GetStatus();
3751 std::string tas_frames_string = CreateTASFramesString(total_tas_frames);
3713 switch (tas_status) { 3752 switch (tas_status) {
3714 case InputCommon::TasInput::TasState::Running: 3753 case InputCommon::TasInput::TasState::Running:
3715 return tr("TAS state: Running %1/%2").arg(current_tas_frame).arg(total_tas_frames); 3754 return tr("TAS state: Running %1/%2")
3755 .arg(current_tas_frame)
3756 .arg(QString::fromStdString(tas_frames_string));
3716 case InputCommon::TasInput::TasState::Recording: 3757 case InputCommon::TasInput::TasState::Recording:
3717 return tr("TAS state: Recording %1").arg(total_tas_frames); 3758 return tr("TAS state: Recording %1").arg(total_tas_frames[0]);
3718 case InputCommon::TasInput::TasState::Stopped: 3759 case InputCommon::TasInput::TasState::Stopped:
3719 return tr("TAS state: Idle %1/%2").arg(current_tas_frame).arg(total_tas_frames); 3760 return tr("TAS state: Idle %1/%2")
3761 .arg(current_tas_frame)
3762 .arg(QString::fromStdString(tas_frames_string));
3720 default: 3763 default:
3721 return tr("TAS State: Invalid"); 3764 return tr("TAS State: Invalid");
3722 } 3765 }
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index db318485d..0f61abc7a 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -12,6 +12,7 @@
12 12
13#include "common/announce_multiplayer_room.h" 13#include "common/announce_multiplayer_room.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "input_common/drivers/tas_input.h"
15#include "yuzu/compatibility_list.h" 16#include "yuzu/compatibility_list.h"
16#include "yuzu/hotkeys.h" 17#include "yuzu/hotkeys.h"
17 18
@@ -266,6 +267,9 @@ private:
266 void changeEvent(QEvent* event) override; 267 void changeEvent(QEvent* event) override;
267 void closeEvent(QCloseEvent* event) override; 268 void closeEvent(QCloseEvent* event) override;
268 269
270 std::string CreateTASFramesString(
271 std::array<size_t, InputCommon::TasInput::PLAYER_NUMBER> frames) const;
272
269#ifdef __unix__ 273#ifdef __unix__
270 void SetupSigInterrupts(); 274 void SetupSigInterrupts();
271 static void HandleSigInterrupt(int); 275 static void HandleSigInterrupt(int);
@@ -347,6 +351,7 @@ private:
347 void RemoveUpdateContent(u64 program_id, InstalledEntryType type); 351 void RemoveUpdateContent(u64 program_id, InstalledEntryType type);
348 void RemoveAddOnContent(u64 program_id, InstalledEntryType type); 352 void RemoveAddOnContent(u64 program_id, InstalledEntryType type);
349 void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); 353 void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target);
354 void RemoveVulkanDriverPipelineCache(u64 program_id);
350 void RemoveAllTransferableShaderCaches(u64 program_id); 355 void RemoveAllTransferableShaderCaches(u64 program_id);
351 void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); 356 void RemoveCustomConfiguration(u64 program_id, const std::string& game_path);
352 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); 357 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 1e45e57bc..527017282 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -296,6 +296,7 @@ void Config::ReadValues() {
296 296
297 // Renderer 297 // Renderer
298 ReadSetting("Renderer", Settings::values.renderer_backend); 298 ReadSetting("Renderer", Settings::values.renderer_backend);
299 ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
299 ReadSetting("Renderer", Settings::values.renderer_debug); 300 ReadSetting("Renderer", Settings::values.renderer_debug);
300 ReadSetting("Renderer", Settings::values.renderer_shader_feedback); 301 ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
301 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); 302 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);
@@ -321,6 +322,7 @@ void Config::ReadValues() {
321 ReadSetting("Renderer", Settings::values.accelerate_astc); 322 ReadSetting("Renderer", Settings::values.accelerate_astc);
322 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 323 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
323 ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); 324 ReadSetting("Renderer", Settings::values.use_pessimistic_flushes);
325 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
324 326
325 ReadSetting("Renderer", Settings::values.bg_red); 327 ReadSetting("Renderer", Settings::values.bg_red);
326 ReadSetting("Renderer", Settings::values.bg_green); 328 ReadSetting("Renderer", Settings::values.bg_green);