summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/input.h3
-rw-r--r--src/common/settings.cpp1
-rw-r--r--src/common/settings.h1
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp4
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp4
-rw-r--r--src/core/hid/emulated_controller.cpp10
-rw-r--r--src/core/internal_network/network.cpp6
-rw-r--r--src/input_common/helpers/stick_from_buttons.cpp17
-rw-r--r--src/input_common/main.cpp26
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp8
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp58
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp45
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp116
-rw-r--r--src/shader_recompiler/host_translate_info.h3
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/shader_recompiler/shader_info.h12
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_turbo_mode.comp29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp8
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp27
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp222
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.h35
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp1389
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h443
-rw-r--r--src/yuzu/Info.plist2
-rw-r--r--src/yuzu/configuration/config.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp11
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu/main.cpp10
-rw-r--r--src/yuzu_cmd/config.cpp1
46 files changed, 1292 insertions, 1289 deletions
diff --git a/src/common/input.h b/src/common/input.h
index fc14fd7bf..d27b1d772 100644
--- a/src/common/input.h
+++ b/src/common/input.h
@@ -292,9 +292,6 @@ class InputDevice {
292public: 292public:
293 virtual ~InputDevice() = default; 293 virtual ~InputDevice() = default;
294 294
295 // Request input device to update if necessary
296 virtual void SoftUpdate() {}
297
298 // Force input device to update data regardless of the current state 295 // Force input device to update data regardless of the current state
299 virtual void ForceUpdate() {} 296 virtual void ForceUpdate() {}
300 297
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 2eaded242..1638b79f5 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
185 // Renderer 185 // Renderer
186 values.fsr_sharpening_slider.SetGlobal(true); 186 values.fsr_sharpening_slider.SetGlobal(true);
187 values.renderer_backend.SetGlobal(true); 187 values.renderer_backend.SetGlobal(true);
188 values.renderer_force_max_clock.SetGlobal(true);
188 values.vulkan_device.SetGlobal(true); 189 values.vulkan_device.SetGlobal(true);
189 values.aspect_ratio.SetGlobal(true); 190 values.aspect_ratio.SetGlobal(true);
190 values.max_anisotropy.SetGlobal(true); 191 values.max_anisotropy.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index d9e82087d..a457e3f23 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -415,6 +415,7 @@ struct Values {
415 // Renderer 415 // Renderer
416 SwitchableSetting<RendererBackend, true> renderer_backend{ 416 SwitchableSetting<RendererBackend, true> renderer_backend{
417 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; 417 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
418 SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
418 Setting<bool> renderer_debug{false, "debug"}; 419 Setting<bool> renderer_debug{false, "debug"};
419 Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; 420 Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
420 Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; 421 Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 947747d36..2a7570073 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -229,7 +229,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
229 config.enable_cycle_counting = true; 229 config.enable_cycle_counting = true;
230 230
231 // Code cache size 231 // Code cache size
232#ifdef ARCHITECTURE_arm64
233 config.code_cache_size = 128_MiB;
234#else
232 config.code_cache_size = 512_MiB; 235 config.code_cache_size = 512_MiB;
236#endif
233 237
234 // Allow memory fault handling to work 238 // Allow memory fault handling to work
235 if (system.DebuggerEnabled()) { 239 if (system.DebuggerEnabled()) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 3df943df7..7229fdc2a 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -288,7 +288,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
288 config.enable_cycle_counting = true; 288 config.enable_cycle_counting = true;
289 289
290 // Code cache size 290 // Code cache size
291#ifdef ARCHITECTURE_arm64
292 config.code_cache_size = 128_MiB;
293#else
291 config.code_cache_size = 512_MiB; 294 config.code_cache_size = 512_MiB;
295#endif
292 296
293 // Allow memory fault handling to work 297 // Allow memory fault handling to work
294 if (system.DebuggerEnabled()) { 298 if (system.DebuggerEnabled()) {
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp
index 71364c323..7a01f3f4c 100644
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -1434,16 +1434,6 @@ AnalogSticks EmulatedController::GetSticks() const {
1434 return {}; 1434 return {};
1435 } 1435 }
1436 1436
1437 // Some drivers like stick from buttons need constant refreshing
1438 for (auto& device : stick_devices) {
1439 if (!device) {
1440 continue;
1441 }
1442 lock.unlock();
1443 device->SoftUpdate();
1444 lock.lock();
1445 }
1446
1447 return controller.analog_stick_state; 1437 return controller.analog_stick_state;
1448} 1438}
1449 1439
diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp
index 447fbffaa..282ea1ff9 100644
--- a/src/core/internal_network/network.cpp
+++ b/src/core/internal_network/network.cpp
@@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
117 return Errno::NETUNREACH; 117 return Errno::NETUNREACH;
118 case WSAEMSGSIZE: 118 case WSAEMSGSIZE:
119 return Errno::MSGSIZE; 119 return Errno::MSGSIZE;
120 case WSAETIMEDOUT:
121 return Errno::TIMEDOUT;
120 default: 122 default:
121 UNIMPLEMENTED_MSG("Unimplemented errno={}", e); 123 UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
122 return Errno::OTHER; 124 return Errno::OTHER;
@@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
211 return Errno::NETUNREACH; 213 return Errno::NETUNREACH;
212 case EMSGSIZE: 214 case EMSGSIZE:
213 return Errno::MSGSIZE; 215 return Errno::MSGSIZE;
216 case ETIMEDOUT:
217 return Errno::TIMEDOUT;
214 default: 218 default:
215 UNIMPLEMENTED_MSG("Unimplemented errno={}", e); 219 UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
216 return Errno::OTHER; 220 return Errno::OTHER;
@@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
226 int e = errno; 230 int e = errno;
227#endif 231#endif
228 const Errno err = TranslateNativeError(e); 232 const Errno err = TranslateNativeError(e);
229 if (err == Errno::AGAIN) { 233 if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
230 return err; 234 return err;
231 } 235 }
232 LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); 236 LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
diff --git a/src/input_common/helpers/stick_from_buttons.cpp b/src/input_common/helpers/stick_from_buttons.cpp
index 82aa6ac2f..f3a0b3419 100644
--- a/src/input_common/helpers/stick_from_buttons.cpp
+++ b/src/input_common/helpers/stick_from_buttons.cpp
@@ -13,11 +13,11 @@ class Stick final : public Common::Input::InputDevice {
13public: 13public:
14 using Button = std::unique_ptr<Common::Input::InputDevice>; 14 using Button = std::unique_ptr<Common::Input::InputDevice>;
15 15
16 Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, 16 Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, Button updater_,
17 float modifier_scale_, float modifier_angle_) 17 float modifier_scale_, float modifier_angle_)
18 : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)), 18 : up(std::move(up_)), down(std::move(down_)), left(std::move(left_)),
19 right(std::move(right_)), modifier(std::move(modifier_)), modifier_scale(modifier_scale_), 19 right(std::move(right_)), modifier(std::move(modifier_)), updater(std::move(updater_)),
20 modifier_angle(modifier_angle_) { 20 modifier_scale(modifier_scale_), modifier_angle(modifier_angle_) {
21 up->SetCallback({ 21 up->SetCallback({
22 .on_change = 22 .on_change =
23 [this](const Common::Input::CallbackStatus& callback_) { 23 [this](const Common::Input::CallbackStatus& callback_) {
@@ -48,6 +48,9 @@ public:
48 UpdateModButtonStatus(callback_); 48 UpdateModButtonStatus(callback_);
49 }, 49 },
50 }); 50 });
51 updater->SetCallback({
52 .on_change = [this](const Common::Input::CallbackStatus& callback_) { SoftUpdate(); },
53 });
51 last_x_axis_value = 0.0f; 54 last_x_axis_value = 0.0f;
52 last_y_axis_value = 0.0f; 55 last_y_axis_value = 0.0f;
53 } 56 }
@@ -248,7 +251,7 @@ public:
248 modifier->ForceUpdate(); 251 modifier->ForceUpdate();
249 } 252 }
250 253
251 void SoftUpdate() override { 254 void SoftUpdate() {
252 Common::Input::CallbackStatus status{ 255 Common::Input::CallbackStatus status{
253 .type = Common::Input::InputType::Stick, 256 .type = Common::Input::InputType::Stick,
254 .stick_status = GetStatus(), 257 .stick_status = GetStatus(),
@@ -308,6 +311,7 @@ private:
308 Button left; 311 Button left;
309 Button right; 312 Button right;
310 Button modifier; 313 Button modifier;
314 Button updater;
311 float modifier_scale{}; 315 float modifier_scale{};
312 float modifier_angle{}; 316 float modifier_angle{};
313 float angle{}; 317 float angle{};
@@ -331,11 +335,12 @@ std::unique_ptr<Common::Input::InputDevice> StickFromButton::Create(
331 auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine)); 335 auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine));
332 auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine)); 336 auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine));
333 auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine)); 337 auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine));
338 auto updater = Common::Input::CreateInputDeviceFromString("engine:updater,button:0");
334 auto modifier_scale = params.Get("modifier_scale", 0.5f); 339 auto modifier_scale = params.Get("modifier_scale", 0.5f);
335 auto modifier_angle = params.Get("modifier_angle", 5.5f); 340 auto modifier_angle = params.Get("modifier_angle", 5.5f);
336 return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left), 341 return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left),
337 std::move(right), std::move(modifier), modifier_scale, 342 std::move(right), std::move(modifier), std::move(updater),
338 modifier_angle); 343 modifier_scale, modifier_angle);
339} 344}
340 345
341} // namespace InputCommon 346} // namespace InputCommon
diff --git a/src/input_common/main.cpp b/src/input_common/main.cpp
index 4dc92f482..e0b2131ed 100644
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -28,6 +28,28 @@
28 28
29namespace InputCommon { 29namespace InputCommon {
30 30
31/// Dummy engine to get periodic updates
32class UpdateEngine final : public InputEngine {
33public:
34 explicit UpdateEngine(std::string input_engine_) : InputEngine(std::move(input_engine_)) {
35 PreSetController(identifier);
36 }
37
38 void PumpEvents() {
39 SetButton(identifier, 0, last_state);
40 last_state = !last_state;
41 }
42
43private:
44 static constexpr PadIdentifier identifier = {
45 .guid = Common::UUID{},
46 .port = 0,
47 .pad = 0,
48 };
49
50 bool last_state{};
51};
52
31struct InputSubsystem::Impl { 53struct InputSubsystem::Impl {
32 template <typename Engine> 54 template <typename Engine>
33 void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) { 55 void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) {
@@ -45,6 +67,7 @@ struct InputSubsystem::Impl {
45 void Initialize() { 67 void Initialize() {
46 mapping_factory = std::make_shared<MappingFactory>(); 68 mapping_factory = std::make_shared<MappingFactory>();
47 69
70 RegisterEngine("updater", update_engine);
48 RegisterEngine("keyboard", keyboard); 71 RegisterEngine("keyboard", keyboard);
49 RegisterEngine("mouse", mouse); 72 RegisterEngine("mouse", mouse);
50 RegisterEngine("touch", touch_screen); 73 RegisterEngine("touch", touch_screen);
@@ -74,6 +97,7 @@ struct InputSubsystem::Impl {
74 } 97 }
75 98
76 void Shutdown() { 99 void Shutdown() {
100 UnregisterEngine(update_engine);
77 UnregisterEngine(keyboard); 101 UnregisterEngine(keyboard);
78 UnregisterEngine(mouse); 102 UnregisterEngine(mouse);
79 UnregisterEngine(touch_screen); 103 UnregisterEngine(touch_screen);
@@ -252,6 +276,7 @@ struct InputSubsystem::Impl {
252 } 276 }
253 277
254 void PumpEvents() const { 278 void PumpEvents() const {
279 update_engine->PumpEvents();
255#ifdef HAVE_SDL2 280#ifdef HAVE_SDL2
256 sdl->PumpEvents(); 281 sdl->PumpEvents();
257#endif 282#endif
@@ -263,6 +288,7 @@ struct InputSubsystem::Impl {
263 288
264 std::shared_ptr<MappingFactory> mapping_factory; 289 std::shared_ptr<MappingFactory> mapping_factory;
265 290
291 std::shared_ptr<UpdateEngine> update_engine;
266 std::shared_ptr<Keyboard> keyboard; 292 std::shared_ptr<Keyboard> keyboard;
267 std::shared_ptr<Mouse> mouse; 293 std::shared_ptr<Mouse> mouse;
268 std::shared_ptr<TouchScreen> touch_screen; 294 std::shared_ptr<TouchScreen> touch_screen;
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index db9c94ce8..0cd87a48f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
321 case IR::Attribute::PositionY: 321 case IR::Attribute::PositionY:
322 case IR::Attribute::PositionZ: 322 case IR::Attribute::PositionZ:
323 case IR::Attribute::PositionW: 323 case IR::Attribute::PositionW:
324 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, 324 return ctx.OpLoad(
325 ctx.Const(element))); 325 ctx.F32[1],
326 ctx.need_input_position_indirect
327 ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
328 ctx.Const(element))
329 : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
326 case IR::Attribute::InstanceId: 330 case IR::Attribute::InstanceId:
327 if (ctx.profile.support_vertex_instance_id) { 331 if (ctx.profile.support_vertex_instance_id) {
328 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); 332 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 2c90f2368..c5db19d09 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); 58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
59} 59}
60 60
61Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { 61Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
62 const Id thirty_two{ctx.Const(32u)}; 62 const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
63 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; 63 const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
64 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; 64 return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
65 return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
66} 65}
67} // Anonymous namespace 66} // Anonymous namespace
68 67
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
145Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 144Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
146 Id segmentation_mask) { 145 Id segmentation_mask) {
147 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; 146 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
148 const Id thread_id{GetThreadId(ctx)}; 147 const Id thread_id{EmitLaneId(ctx)};
149 if (ctx.profile.warp_size_potentially_larger_than_guest) {
150 const Id thirty_two{ctx.Const(32u)};
151 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
152 const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
153 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
154 index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
155 clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
156 }
157 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; 148 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
158 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; 149 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
159 150
160 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; 151 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
161 const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; 152 Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
162 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 153 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
163 154
155 if (ctx.profile.warp_size_potentially_larger_than_guest) {
156 src_thread_id = AddPartitionBase(ctx, src_thread_id);
157 }
158
164 SetInBoundsFlag(inst, in_range); 159 SetInBoundsFlag(inst, in_range);
165 return SelectValue(ctx, in_range, value, src_thread_id); 160 return SelectValue(ctx, in_range, value, src_thread_id);
166} 161}
167 162
168Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 163Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
169 Id segmentation_mask) { 164 Id segmentation_mask) {
170 const Id thread_id{GetThreadId(ctx)}; 165 const Id thread_id{EmitLaneId(ctx)};
171 if (ctx.profile.warp_size_potentially_larger_than_guest) {
172 clamp = GetUpperClamp(ctx, thread_id, clamp);
173 }
174 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 166 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
175 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; 167 Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
176 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 168 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
177 169
170 if (ctx.profile.warp_size_potentially_larger_than_guest) {
171 src_thread_id = AddPartitionBase(ctx, src_thread_id);
172 }
173
178 SetInBoundsFlag(inst, in_range); 174 SetInBoundsFlag(inst, in_range);
179 return SelectValue(ctx, in_range, value, src_thread_id); 175 return SelectValue(ctx, in_range, value, src_thread_id);
180} 176}
181 177
182Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 178Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
183 Id segmentation_mask) { 179 Id segmentation_mask) {
184 const Id thread_id{GetThreadId(ctx)}; 180 const Id thread_id{EmitLaneId(ctx)};
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 clamp = GetUpperClamp(ctx, thread_id, clamp);
187 }
188 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 181 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
189 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; 182 Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
190 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 183 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
191 184
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 src_thread_id = AddPartitionBase(ctx, src_thread_id);
187 }
188
192 SetInBoundsFlag(inst, in_range); 189 SetInBoundsFlag(inst, in_range);
193 return SelectValue(ctx, in_range, value, src_thread_id); 190 return SelectValue(ctx, in_range, value, src_thread_id);
194} 191}
195 192
196Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 193Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
197 Id segmentation_mask) { 194 Id segmentation_mask) {
198 const Id thread_id{GetThreadId(ctx)}; 195 const Id thread_id{EmitLaneId(ctx)};
199 if (ctx.profile.warp_size_potentially_larger_than_guest) {
200 clamp = GetUpperClamp(ctx, thread_id, clamp);
201 }
202 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 196 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
203 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; 197 Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
204 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 198 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
205 199
200 if (ctx.profile.warp_size_potentially_larger_than_guest) {
201 src_thread_id = AddPartitionBase(ctx, src_thread_id);
202 }
203
206 SetInBoundsFlag(inst, in_range); 204 SetInBoundsFlag(inst, in_range);
207 return SelectValue(ctx, in_range, value, src_thread_id); 205 return SelectValue(ctx, in_range, value, src_thread_id);
208} 206}
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index ecb2db494..a0c155fdb 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
544 U16 = Name(TypeInt(16, false), "u16"); 544 U16 = Name(TypeInt(16, false), "u16");
545 S16 = Name(TypeInt(16, true), "s16"); 545 S16 = Name(TypeInt(16, true), "s16");
546 } 546 }
547 if (info.uses_int64) { 547 if (info.uses_int64 && profile.support_int64) {
548 AddCapability(spv::Capability::Int64); 548 AddCapability(spv::Capability::Int64);
549 U64 = Name(TypeInt(64, false), "u64"); 549 U64 = Name(TypeInt(64, false), "u64");
550 } 550 }
@@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
721 size_t label_index{0}; 721 size_t label_index{0};
722 if (info.loads.AnyComponent(IR::Attribute::PositionX)) { 722 if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
723 AddLabel(labels[label_index]); 723 AddLabel(labels[label_index]);
724 const Id pointer{is_array 724 const Id pointer{[&]() {
725 ? OpAccessChain(input_f32, input_position, vertex, masked_index) 725 if (need_input_position_indirect) {
726 : OpAccessChain(input_f32, input_position, masked_index)}; 726 if (is_array)
727 return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
728 masked_index);
729 else
730 return OpAccessChain(input_f32, input_position, u32_zero_value,
731 masked_index);
732 } else {
733 if (is_array)
734 return OpAccessChain(input_f32, input_position, vertex, masked_index);
735 else
736 return OpAccessChain(input_f32, input_position, masked_index);
737 }
738 }()};
727 const Id result{OpLoad(F32[1], pointer)}; 739 const Id result{OpLoad(F32[1], pointer)};
728 OpReturnValue(result); 740 OpReturnValue(result);
729 ++label_index; 741 ++label_index;
@@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1367 Decorate(layer, spv::Decoration::Flat); 1379 Decorate(layer, spv::Decoration::Flat);
1368 } 1380 }
1369 if (loads.AnyComponent(IR::Attribute::PositionX)) { 1381 if (loads.AnyComponent(IR::Attribute::PositionX)) {
1370 const bool is_fragment{stage != Stage::Fragment}; 1382 const bool is_fragment{stage == Stage::Fragment};
1371 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; 1383 if (!is_fragment && profile.has_broken_spirv_position_input) {
1372 input_position = DefineInput(*this, F32[4], true, built_in); 1384 need_input_position_indirect = true;
1373 if (profile.support_geometry_shader_passthrough) { 1385
1374 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { 1386 const Id input_position_struct = TypeStruct(F32[4]);
1375 Decorate(input_position, spv::Decoration::PassthroughNV); 1387 input_position = DefineInput(*this, input_position_struct, true);
1388
1389 MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
1390 static_cast<unsigned>(spv::BuiltIn::Position));
1391 Decorate(input_position_struct, spv::Decoration::Block);
1392 } else {
1393 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
1394 : spv::BuiltIn::Position};
1395 input_position = DefineInput(*this, F32[4], true, built_in);
1396
1397 if (profile.support_geometry_shader_passthrough) {
1398 if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
1399 Decorate(input_position, spv::Decoration::PassthroughNV);
1400 }
1376 } 1401 }
1377 } 1402 }
1378 } 1403 }
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 4414a5169..dbc5c55b9 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -280,6 +280,7 @@ public:
280 Id write_global_func_u32x2{}; 280 Id write_global_func_u32x2{};
281 Id write_global_func_u32x4{}; 281 Id write_global_func_u32x4{};
282 282
283 bool need_input_position_indirect{};
283 Id input_position{}; 284 Id input_position{};
284 std::array<Id, 32> input_generics{}; 285 std::array<Id, 32> input_generics{};
285 286
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index ac159d24b..a42453e90 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
171 } 171 }
172 return mapping; 172 return mapping;
173} 173}
174
175void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
176 const Shader::VaryingState& passthrough_mask,
177 bool passthrough_position,
178 std::optional<IR::Attribute> passthrough_layer_attr) {
179 for (u32 i = 0; i < program.output_vertices; i++) {
180 // Assign generics from input
181 for (u32 j = 0; j < 32; j++) {
182 if (!passthrough_mask.Generic(j)) {
183 continue;
184 }
185
186 const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
187 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
188 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
189 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
190 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
191 }
192
193 if (passthrough_position) {
194 // Assign position from input
195 const IR::Attribute attr = IR::Attribute::PositionX;
196 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
197 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
198 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
199 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
200 }
201
202 if (passthrough_layer_attr) {
203 // Assign layer
204 ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
205 ir.Imm32(0));
206 }
207
208 // Emit vertex
209 ir.EmitVertex(ir.Imm32(0));
210 }
211 ir.EndPrimitive(ir.Imm32(0));
212}
213
214u32 GetOutputTopologyVertices(OutputTopology output_topology) {
215 switch (output_topology) {
216 case OutputTopology::PointList:
217 return 1;
218 case OutputTopology::LineStrip:
219 return 2;
220 default:
221 return 3;
222 }
223}
224
225void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
226 for (IR::Block* const block : program.blocks) {
227 for (IR::Inst& inst : block->Instructions()) {
228 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
229 IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
230 EmitGeometryPassthrough(
231 ir, program, program.info.passthrough,
232 program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
233 }
234 }
235 }
236}
237
174} // Anonymous namespace 238} // Anonymous namespace
175 239
176IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 240IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
195 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; 259 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
196 if (program.is_geometry_passthrough) { 260 if (program.is_geometry_passthrough) {
197 const auto& mask{env.GpPassthroughMask()}; 261 const auto& mask{env.GpPassthroughMask()};
198 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { 262 for (size_t i = 0; i < mask.size() * 32; ++i) {
199 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; 263 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
200 } 264 }
265
266 if (!host_info.support_geometry_shader_passthrough) {
267 program.output_vertices = GetOutputTopologyVertices(program.output_topology);
268 LowerGeometryPassthrough(program, host_info);
269 }
201 } 270 }
202 break; 271 break;
203 } 272 }
@@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
223 292
224 Optimization::PositionPass(env, program); 293 Optimization::PositionPass(env, program);
225 294
226 Optimization::GlobalMemoryToStorageBufferPass(program); 295 Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
227 Optimization::TexturePass(env, program, host_info); 296 Optimization::TexturePass(env, program, host_info);
228 297
229 if (Settings::values.resolution_info.active) { 298 if (Settings::values.resolution_info.active) {
@@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
342 IR::Program program; 411 IR::Program program;
343 program.stage = Stage::Geometry; 412 program.stage = Stage::Geometry;
344 program.output_topology = output_topology; 413 program.output_topology = output_topology;
345 switch (output_topology) { 414 program.output_vertices = GetOutputTopologyVertices(output_topology);
346 case OutputTopology::PointList:
347 program.output_vertices = 1;
348 break;
349 case OutputTopology::LineStrip:
350 program.output_vertices = 2;
351 break;
352 default:
353 program.output_vertices = 3;
354 break;
355 }
356 415
357 program.is_geometry_passthrough = false; 416 program.is_geometry_passthrough = false;
358 program.info.loads.mask = source_program.info.stores.mask; 417 program.info.loads.mask = source_program.info.stores.mask;
@@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
366 node.data.block = current_block; 425 node.data.block = current_block;
367 426
368 IR::IREmitter ir{*current_block}; 427 IR::IREmitter ir{*current_block};
369 for (u32 i = 0; i < program.output_vertices; i++) { 428 EmitGeometryPassthrough(ir, program, program.info.stores, true,
370 // Assign generics from input 429 source_program.info.emulated_layer);
371 for (u32 j = 0; j < 32; j++) {
372 if (!program.info.stores.Generic(j)) {
373 continue;
374 }
375
376 const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
377 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
378 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
379 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
380 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
381 }
382
383 // Assign position from input
384 const IR::Attribute attr = IR::Attribute::PositionX;
385 ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
386 ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
387 ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
388 ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
389
390 // Assign layer
391 ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
392 ir.Imm32(0));
393
394 // Emit vertex
395 ir.EmitVertex(ir.Imm32(0));
396 }
397 ir.EndPrimitive(ir.Imm32(0));
398 430
399 IR::Block* return_block{block_pool.Create(inst_pool)}; 431 IR::Block* return_block{block_pool.Create(inst_pool)};
400 IR::IREmitter{*return_block}.Epilogue(); 432 IR::IREmitter{*return_block}.Epilogue();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index d5d279554..55fc48768 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -15,6 +15,9 @@ struct HostTranslateInfo {
15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered 15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers 16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS 17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
18 u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
19 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
20 ///< passthrough shaders
18}; 21};
19 22
20} // namespace Shader 23} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 336338e62..9101722ba 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
11#include "shader_recompiler/frontend/ir/breadth_first_search.h" 11#include "shader_recompiler/frontend/ir/breadth_first_search.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/host_translate_info.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
16namespace Shader::Optimization { 17namespace Shader::Optimization {
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
402} 403}
403 404
404/// Returns the offset in indices (not bytes) for an equivalent storage instruction 405/// Returns the offset in indices (not bytes) for an equivalent storage instruction
405IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { 406IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
406 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; 407 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
407 IR::U32 offset; 408 IR::U32 offset;
408 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { 409 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
415 } 416 }
416 // Subtract the least significant 32 bits from the guest offset. The result is the storage 417 // Subtract the least significant 32 bits from the guest offset. The result is the storage
417 // buffer offset in bytes. 418 // buffer offset in bytes.
418 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; 419 IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
420
421 // Align the offset base to match the host alignment requirements
422 low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
419 return ir.ISub(offset, low_cbuf); 423 return ir.ISub(offset, low_cbuf);
420} 424}
421 425
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
510} 514}
511} // Anonymous namespace 515} // Anonymous namespace
512 516
513void GlobalMemoryToStorageBufferPass(IR::Program& program) { 517void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
514 StorageInfo info; 518 StorageInfo info;
515 for (IR::Block* const block : program.post_order_blocks) { 519 for (IR::Block* const block : program.post_order_blocks) {
516 for (IR::Inst& inst : block->Instructions()) { 520 for (IR::Inst& inst : block->Instructions()) {
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
534 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; 538 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
535 IR::Block* const block{storage_inst.block}; 539 IR::Block* const block{storage_inst.block};
536 IR::Inst* const inst{storage_inst.inst}; 540 IR::Inst* const inst{storage_inst.inst};
537 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; 541 const IR::U32 offset{
542 StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
538 Replace(*block, *inst, index, offset); 543 Replace(*block, *inst, index, offset);
539 } 544 }
540} 545}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 1f8f2ba95..4ffad1172 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -15,7 +15,7 @@ namespace Shader::Optimization {
15void CollectShaderInfoPass(Environment& env, IR::Program& program); 15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(Environment& env, IR::Program& program); 16void ConstantPropagationPass(Environment& env, IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program); 17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program); 18void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program); 20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program); 21void LowerInt64ToInt32(IR::Program& program);
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index b8841a536..253e0d0bd 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -55,6 +55,8 @@ struct Profile {
55 55
56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead 56 /// OpFClamp is broken and OpFMax + OpFMin should be used instead
57 bool has_broken_spirv_clamp{}; 57 bool has_broken_spirv_clamp{};
58 /// The Position builtin needs to be wrapped in a struct when used as an input
59 bool has_broken_spirv_position_input{};
58 /// Offset image operands with an unsigned type do not work 60 /// Offset image operands with an unsigned type do not work
59 bool has_broken_unsigned_image_offsets{}; 61 bool has_broken_unsigned_image_offsets{};
60 /// Signed instructions with unsigned data types are misinterpreted 62 /// Signed instructions with unsigned data types are misinterpreted
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 44236b6b1..f93181e1e 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -65,6 +65,8 @@ enum class Interpolation {
65struct ConstantBufferDescriptor { 65struct ConstantBufferDescriptor {
66 u32 index; 66 u32 index;
67 u32 count; 67 u32 count;
68
69 auto operator<=>(const ConstantBufferDescriptor&) const = default;
68}; 70};
69 71
70struct StorageBufferDescriptor { 72struct StorageBufferDescriptor {
@@ -72,6 +74,8 @@ struct StorageBufferDescriptor {
72 u32 cbuf_offset; 74 u32 cbuf_offset;
73 u32 count; 75 u32 count;
74 bool is_written; 76 bool is_written;
77
78 auto operator<=>(const StorageBufferDescriptor&) const = default;
75}; 79};
76 80
77struct TextureBufferDescriptor { 81struct TextureBufferDescriptor {
@@ -84,6 +88,8 @@ struct TextureBufferDescriptor {
84 u32 secondary_shift_left; 88 u32 secondary_shift_left;
85 u32 count; 89 u32 count;
86 u32 size_shift; 90 u32 size_shift;
91
92 auto operator<=>(const TextureBufferDescriptor&) const = default;
87}; 93};
88using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; 94using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
89 95
@@ -95,6 +101,8 @@ struct ImageBufferDescriptor {
95 u32 cbuf_offset; 101 u32 cbuf_offset;
96 u32 count; 102 u32 count;
97 u32 size_shift; 103 u32 size_shift;
104
105 auto operator<=>(const ImageBufferDescriptor&) const = default;
98}; 106};
99using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; 107using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
100 108
@@ -110,6 +118,8 @@ struct TextureDescriptor {
110 u32 secondary_shift_left; 118 u32 secondary_shift_left;
111 u32 count; 119 u32 count;
112 u32 size_shift; 120 u32 size_shift;
121
122 auto operator<=>(const TextureDescriptor&) const = default;
113}; 123};
114using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; 124using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
115 125
@@ -122,6 +132,8 @@ struct ImageDescriptor {
122 u32 cbuf_offset; 132 u32 cbuf_offset;
123 u32 count; 133 u32 count;
124 u32 size_shift; 134 u32 size_shift;
135
136 auto operator<=>(const ImageDescriptor&) const = default;
125}; 137};
126using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; 138using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
127 139
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index b7095ae13..f617665de 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -191,6 +191,8 @@ add_library(video_core STATIC
191 renderer_vulkan/vk_texture_cache.cpp 191 renderer_vulkan/vk_texture_cache.cpp
192 renderer_vulkan/vk_texture_cache.h 192 renderer_vulkan/vk_texture_cache.h
193 renderer_vulkan/vk_texture_cache_base.cpp 193 renderer_vulkan/vk_texture_cache_base.cpp
194 renderer_vulkan/vk_turbo_mode.cpp
195 renderer_vulkan/vk_turbo_mode.h
194 renderer_vulkan/vk_update_descriptor.cpp 196 renderer_vulkan/vk_update_descriptor.cpp
195 renderer_vulkan/vk_update_descriptor.h 197 renderer_vulkan/vk_update_descriptor.h
196 shader_cache.cpp 198 shader_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 06fd40851..627917ab6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1938 bool is_written) const { 1938 bool is_written) const {
1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); 1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); 1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
1941 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1941 const u32 alignment = runtime.GetStorageBufferAlignment();
1942
1943 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1944 const u32 aligned_size =
1945 Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
1946
1947 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1942 if (!cpu_addr || size == 0) { 1948 if (!cpu_addr || size == 0) {
1943 return NULL_BINDING; 1949 return NULL_BINDING;
1944 } 1950 }
1945 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1951
1952 const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
1946 const Binding binding{ 1953 const Binding binding{
1947 .cpu_addr = *cpu_addr, 1954 .cpu_addr = *cpu_addr,
1948 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1955 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
1949 .buffer_id = BufferId{}, 1956 .buffer_id = BufferId{},
1950 }; 1957 };
1951 return binding; 1958 return binding;
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index e6dc24f22..f275b2aa9 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -47,6 +47,7 @@ set(SHADER_FILES
47 vulkan_present_scaleforce_fp16.frag 47 vulkan_present_scaleforce_fp16.frag
48 vulkan_present_scaleforce_fp32.frag 48 vulkan_present_scaleforce_fp32.frag
49 vulkan_quad_indexed.comp 49 vulkan_quad_indexed.comp
50 vulkan_turbo_mode.comp
50 vulkan_uint8.comp 51 vulkan_uint8.comp
51) 52)
52 53
diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp
new file mode 100644
index 000000000..d651001d9
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp
@@ -0,0 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#version 460 core
5
6layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
7
8layout (binding = 0) buffer ThreadData {
9 uint data[];
10};
11
12uint xorshift32(uint x) {
13 x ^= x << 13;
14 x ^= x >> 17;
15 x ^= x << 5;
16 return x;
17}
18
19uint getGlobalIndex() {
20 return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y;
21}
22
23void main() {
24 uint myIndex = xorshift32(getGlobalIndex());
25 uint otherIndex = xorshift32(myIndex);
26
27 uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1;
28 atomicAdd(data[myIndex % data.length()], otherValue);
29}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..bb1962073 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -160,6 +160,10 @@ public:
160 return device.CanReportMemoryUsage(); 160 return device.CanReportMemoryUsage();
161 } 161 }
162 162
163 u32 GetStorageBufferAlignment() const {
164 return static_cast<u32>(device.GetShaderStorageBufferAlignment());
165 }
166
163private: 167private:
164 static constexpr std::array PABO_LUT{ 168 static constexpr std::array PABO_LUT{
165 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 169 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7d48af8e1..181857d9c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -139,6 +139,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
139void RasterizerOpenGL::Clear(u32 layer_count) { 139void RasterizerOpenGL::Clear(u32 layer_count) {
140 MICROPROFILE_SCOPE(OpenGL_Clears); 140 MICROPROFILE_SCOPE(OpenGL_Clears);
141 141
142 gpu_memory->FlushCaching();
142 const auto& regs = maxwell3d->regs; 143 const auto& regs = maxwell3d->regs;
143 bool use_color{}; 144 bool use_color{};
144 bool use_depth{}; 145 bool use_depth{};
@@ -207,6 +208,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
207 MICROPROFILE_SCOPE(OpenGL_Drawing); 208 MICROPROFILE_SCOPE(OpenGL_Drawing);
208 209
209 SCOPE_EXIT({ gpu.TickWork(); }); 210 SCOPE_EXIT({ gpu.TickWork(); });
211 gpu_memory->FlushCaching();
210 query_cache.UpdateCounters(); 212 query_cache.UpdateCounters();
211 213
212 GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; 214 GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
@@ -319,6 +321,7 @@ void RasterizerOpenGL::DrawIndirect() {
319} 321}
320 322
321void RasterizerOpenGL::DispatchCompute() { 323void RasterizerOpenGL::DispatchCompute() {
324 gpu_memory->FlushCaching();
322 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; 325 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
323 if (!pipeline) { 326 if (!pipeline) {
324 return; 327 return;
@@ -526,6 +529,7 @@ void RasterizerOpenGL::TickFrame() {
526} 529}
527 530
528bool RasterizerOpenGL::AccelerateConditionalRendering() { 531bool RasterizerOpenGL::AccelerateConditionalRendering() {
532 gpu_memory->FlushCaching();
529 if (Settings::IsGPULevelHigh()) { 533 if (Settings::IsGPULevelHigh()) {
530 // Reimplement Host conditional rendering. 534 // Reimplement Host conditional rendering.
531 return false; 535 return false;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 03b6314ff..7dd854e0f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
236 .needs_demote_reorder = device.IsAmd(), 236 .needs_demote_reorder = device.IsAmd(),
237 .support_snorm_render_buffer = false, 237 .support_snorm_render_buffer = false,
238 .support_viewport_index_layer = device.HasVertexViewportLayer(), 238 .support_viewport_index_layer = device.HasVertexViewportLayer(),
239 .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
240 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
239 } { 241 } {
240 if (use_asynchronous_shaders) { 242 if (use_asynchronous_shaders) {
241 workers = CreateWorkers(); 243 workers = CreateWorkers();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bc75680f0..de95f2634 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -442,7 +442,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
442 442
443 glBindTextureUnit(0, screen_info.display_texture); 443 glBindTextureUnit(0, screen_info.display_texture);
444 444
445 const auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); 445 auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
446 if (anti_aliasing > Settings::AntiAliasing::LastAA) {
447 LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing);
448 anti_aliasing = Settings::AntiAliasing::None;
449 Settings::values.anti_aliasing.SetValue(anti_aliasing);
450 }
451
446 if (anti_aliasing != Settings::AntiAliasing::None) { 452 if (anti_aliasing != Settings::AntiAliasing::None) {
447 glEnablei(GL_SCISSOR_TEST, 0); 453 glEnablei(GL_SCISSOR_TEST, 0);
448 auto viewport_width = screen_info.texture.width; 454 auto viewport_width = screen_info.texture.width;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index bf97d25a4..2a8d9e377 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -60,24 +60,13 @@ std::string GetDriverVersion(const Device& device) {
60 return GetReadableVersion(version); 60 return GetReadableVersion(version);
61} 61}
62 62
63std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) { 63std::string BuildCommaSeparatedExtensions(
64 std::sort(std::begin(available_extensions), std::end(available_extensions)); 64 const std::set<std::string, std::less<>>& available_extensions) {
65 65 return fmt::format("{}", fmt::join(available_extensions, ","));
66 static constexpr std::size_t AverageExtensionSize = 64;
67 std::string separated_extensions;
68 separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
69
70 const auto end = std::end(available_extensions);
71 for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
72 if (const bool is_last = extension + 1 == end; is_last) {
73 separated_extensions += *extension;
74 } else {
75 separated_extensions += fmt::format("{},", *extension);
76 }
77 }
78 return separated_extensions;
79} 66}
80 67
68} // Anonymous namespace
69
81Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, 70Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
82 VkSurfaceKHR surface) { 71 VkSurfaceKHR surface) {
83 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); 72 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
@@ -89,7 +78,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
89 const vk::PhysicalDevice physical_device(devices[device_index], dld); 78 const vk::PhysicalDevice physical_device(devices[device_index], dld);
90 return Device(*instance, physical_device, surface, dld); 79 return Device(*instance, physical_device, surface, dld);
91} 80}
92} // Anonymous namespace
93 81
94RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 82RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
95 Core::Frontend::EmuWindow& emu_window, 83 Core::Frontend::EmuWindow& emu_window,
@@ -109,6 +97,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
109 screen_info), 97 screen_info),
110 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 98 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
111 state_tracker, scheduler) { 99 state_tracker, scheduler) {
100 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
101 turbo_mode.emplace(instance, dld);
102 scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
103 }
112 Report(); 104 Report();
113} catch (const vk::Exception& exception) { 105} catch (const vk::Exception& exception) {
114 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); 106 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
@@ -116,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
116} 108}
117 109
118RendererVulkan::~RendererVulkan() { 110RendererVulkan::~RendererVulkan() {
111 scheduler.RegisterOnSubmit([] {});
119 void(device.GetLogical().WaitIdle()); 112 void(device.GetLogical().WaitIdle());
120} 113}
121 114
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index e7bfecb20..009e75e0d 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -13,6 +13,7 @@
13#include "video_core/renderer_vulkan/vk_scheduler.h" 13#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h" 14#include "video_core/renderer_vulkan/vk_state_tracker.h"
15#include "video_core/renderer_vulkan/vk_swapchain.h" 15#include "video_core/renderer_vulkan/vk_swapchain.h"
16#include "video_core/renderer_vulkan/vk_turbo_mode.h"
16#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
17#include "video_core/vulkan_common/vulkan_memory_allocator.h" 18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -31,6 +32,9 @@ class GPU;
31 32
32namespace Vulkan { 33namespace Vulkan {
33 34
35Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
36 VkSurfaceKHR surface);
37
34class RendererVulkan final : public VideoCore::RendererBase { 38class RendererVulkan final : public VideoCore::RendererBase {
35public: 39public:
36 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 40 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -74,6 +78,7 @@ private:
74 Swapchain swapchain; 78 Swapchain swapchain;
75 BlitScreen blit_screen; 79 BlitScreen blit_screen;
76 RasterizerVulkan rasterizer; 80 RasterizerVulkan rasterizer;
81 std::optional<TurboMode> turbo_mode;
77}; 82};
78 83
79} // namespace Vulkan 84} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index b0153a502..1cfb4c2ff 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -330,6 +330,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
330 return device.CanReportMemoryUsage(); 330 return device.CanReportMemoryUsage();
331} 331}
332 332
333u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
334 return static_cast<u32>(device.GetStorageBufferAlignment());
335}
336
333void BufferCacheRuntime::Finish() { 337void BufferCacheRuntime::Finish() {
334 scheduler.Finish(); 338 scheduler.Finish();
335} 339}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 183b33632..06539c733 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -73,6 +73,8 @@ public:
73 73
74 bool CanReportMemoryUsage() const; 74 bool CanReportMemoryUsage() const;
75 75
76 u32 GetStorageBufferAlignment() const;
77
76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 78 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
77 79
78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 80 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 67e5bc648..7e69b11d8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -331,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
331 .need_declared_frag_colors = false, 331 .need_declared_frag_colors = false,
332 332
333 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, 333 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
334 .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
334 .has_broken_unsigned_image_offsets = false, 335 .has_broken_unsigned_image_offsets = false,
335 .has_broken_signed_operations = false, 336 .has_broken_signed_operations = false,
336 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, 337 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
@@ -343,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
343 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, 344 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
344 .support_snorm_render_buffer = true, 345 .support_snorm_render_buffer = true,
345 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), 346 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
347 .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
348 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
346 }; 349 };
347 350
348 if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { 351 if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
@@ -790,7 +793,8 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::
790 return create_pipeline_cache(0, nullptr); 793 return create_pipeline_cache(0, nullptr);
791 } 794 }
792 795
793 const size_t cache_size = static_cast<size_t>(end) - magic_number.size(); 796 static constexpr size_t header_size = magic_number.size() + sizeof(cache_version);
797 const size_t cache_size = static_cast<size_t>(end) - header_size;
794 std::vector<char> cache_data(cache_size); 798 std::vector<char> cache_data(cache_size);
795 file.read(cache_data.data(), cache_size); 799 file.read(cache_data.data(), cache_size);
796 800
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index ed4a72166..b75b8eec6 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -270,6 +270,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
270 MICROPROFILE_SCOPE(Vulkan_Clearing); 270 MICROPROFILE_SCOPE(Vulkan_Clearing);
271 271
272 FlushWork(); 272 FlushWork();
273 gpu_memory->FlushCaching();
273 274
274 query_cache.UpdateCounters(); 275 query_cache.UpdateCounters();
275 276
@@ -628,6 +629,7 @@ void RasterizerVulkan::TickFrame() {
628} 629}
629 630
630bool RasterizerVulkan::AccelerateConditionalRendering() { 631bool RasterizerVulkan::AccelerateConditionalRendering() {
632 gpu_memory->FlushCaching();
631 if (Settings::IsGPULevelHigh()) { 633 if (Settings::IsGPULevelHigh()) {
632 // TODO(Blinkhawk): Reimplement Host conditional rendering. 634 // TODO(Blinkhawk): Reimplement Host conditional rendering.
633 return false; 635 return false;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index c2e53a5d5..e03685af1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
213 .signalSemaphoreCount = num_signal_semaphores, 213 .signalSemaphoreCount = num_signal_semaphores,
214 .pSignalSemaphores = signal_semaphores.data(), 214 .pSignalSemaphores = signal_semaphores.data(),
215 }; 215 };
216
217 if (on_submit) {
218 on_submit();
219 }
220
216 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { 221 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
217 case VK_SUCCESS: 222 case VK_SUCCESS:
218 break; 223 break;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3858c506c..bd4cb0f7e 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -5,6 +5,7 @@
5 5
6#include <condition_variable> 6#include <condition_variable>
7#include <cstddef> 7#include <cstddef>
8#include <functional>
8#include <memory> 9#include <memory>
9#include <thread> 10#include <thread>
10#include <utility> 11#include <utility>
@@ -66,6 +67,11 @@ public:
66 query_cache = &query_cache_; 67 query_cache = &query_cache_;
67 } 68 }
68 69
70 // Registers a callback to perform on queue submission.
71 void RegisterOnSubmit(std::function<void()>&& func) {
72 on_submit = std::move(func);
73 }
74
69 /// Send work to a separate thread. 75 /// Send work to a separate thread.
70 template <typename T> 76 template <typename T>
71 void Record(T&& command) { 77 void Record(T&& command) {
@@ -216,6 +222,7 @@ private:
216 vk::CommandBuffer current_cmdbuf; 222 vk::CommandBuffer current_cmdbuf;
217 223
218 std::unique_ptr<CommandChunk> chunk; 224 std::unique_ptr<CommandChunk> chunk;
225 std::function<void()> on_submit;
219 226
220 State state; 227 State state;
221 228
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
new file mode 100644
index 000000000..c42594149
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -0,0 +1,222 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/literals.h"
5#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
6#include "video_core/renderer_vulkan/renderer_vulkan.h"
7#include "video_core/renderer_vulkan/vk_shader_util.h"
8#include "video_core/renderer_vulkan/vk_turbo_mode.h"
9#include "video_core/vulkan_common/vulkan_device.h"
10
11namespace Vulkan {
12
13using namespace Common::Literals;
14
15TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
16 : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
17 {
18 std::scoped_lock lk{m_submission_lock};
19 m_submission_time = std::chrono::steady_clock::now();
20 }
21 m_thread = std::jthread([&](auto stop_token) { Run(stop_token); });
22}
23
24TurboMode::~TurboMode() = default;
25
26void TurboMode::QueueSubmitted() {
27 std::scoped_lock lk{m_submission_lock};
28 m_submission_time = std::chrono::steady_clock::now();
29 m_submission_cv.notify_one();
30}
31
32void TurboMode::Run(std::stop_token stop_token) {
33 auto& dld = m_device.GetLogical();
34
35 // Allocate buffer. 2MiB should be sufficient.
36 auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
37 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
38 .pNext = nullptr,
39 .flags = 0,
40 .size = 2_MiB,
41 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
42 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
43 .queueFamilyIndexCount = 0,
44 .pQueueFamilyIndices = nullptr,
45 });
46
47 // Commit some device local memory for the buffer.
48 auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
49
50 // Create the descriptor pool to contain our descriptor.
51 constexpr VkDescriptorPoolSize pool_size{
52 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
53 .descriptorCount = 1,
54 };
55
56 auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
57 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
58 .pNext = nullptr,
59 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
60 .maxSets = 1,
61 .poolSizeCount = 1,
62 .pPoolSizes = &pool_size,
63 });
64
65 // Create the descriptor set layout from the pool.
66 constexpr VkDescriptorSetLayoutBinding layout_binding{
67 .binding = 0,
68 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
69 .descriptorCount = 1,
70 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
71 .pImmutableSamplers = nullptr,
72 };
73
74 auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
75 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
76 .pNext = nullptr,
77 .flags = 0,
78 .bindingCount = 1,
79 .pBindings = &layout_binding,
80 });
81
82 // Actually create the descriptor set.
83 auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
84 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
85 .pNext = nullptr,
86 .descriptorPool = *descriptor_pool,
87 .descriptorSetCount = 1,
88 .pSetLayouts = descriptor_set_layout.address(),
89 });
90
91 // Create the shader.
92 auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
93
94 // Create the pipeline layout.
95 auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
96 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
97 .pNext = nullptr,
98 .flags = 0,
99 .setLayoutCount = 1,
100 .pSetLayouts = descriptor_set_layout.address(),
101 .pushConstantRangeCount = 0,
102 .pPushConstantRanges = nullptr,
103 });
104
105 // Actually create the pipeline.
106 const VkPipelineShaderStageCreateInfo shader_stage{
107 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
108 .pNext = nullptr,
109 .flags = 0,
110 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
111 .module = *shader,
112 .pName = "main",
113 .pSpecializationInfo = nullptr,
114 };
115
116 auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
117 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
118 .pNext = nullptr,
119 .flags = 0,
120 .stage = shader_stage,
121 .layout = *pipeline_layout,
122 .basePipelineHandle = VK_NULL_HANDLE,
123 .basePipelineIndex = 0,
124 });
125
126 // Create a fence to wait on.
127 auto fence = dld.CreateFence(VkFenceCreateInfo{
128 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
129 .pNext = nullptr,
130 .flags = 0,
131 });
132
133 // Create a command pool to allocate a command buffer from.
134 auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
135 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
136 .pNext = nullptr,
137 .flags =
138 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
139 .queueFamilyIndex = m_device.GetGraphicsFamily(),
140 });
141
142 // Create a single command buffer.
143 auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
144 auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
145
146 while (!stop_token.stop_requested()) {
147 // Reset the fence.
148 fence.Reset();
149
150 // Update descriptor set.
151 const VkDescriptorBufferInfo buffer_info{
152 .buffer = *buffer,
153 .offset = 0,
154 .range = VK_WHOLE_SIZE,
155 };
156
157 const VkWriteDescriptorSet buffer_write{
158 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
159 .pNext = nullptr,
160 .dstSet = descriptor_set[0],
161 .dstBinding = 0,
162 .dstArrayElement = 0,
163 .descriptorCount = 1,
164 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
165 .pImageInfo = nullptr,
166 .pBufferInfo = &buffer_info,
167 .pTexelBufferView = nullptr,
168 };
169
170 dld.UpdateDescriptorSets(std::array{buffer_write}, {});
171
172 // Set up the command buffer.
173 cmdbuf.Begin(VkCommandBufferBeginInfo{
174 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
175 .pNext = nullptr,
176 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
177 .pInheritanceInfo = nullptr,
178 });
179
180 // Clear the buffer.
181 cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0);
182
183 // Bind descriptor set.
184 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
185 descriptor_set, {});
186
187 // Bind the pipeline.
188 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
189
190 // Dispatch.
191 cmdbuf.Dispatch(64, 64, 1);
192
193 // Finish.
194 cmdbuf.End();
195
196 const VkSubmitInfo submit_info{
197 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
198 .pNext = nullptr,
199 .waitSemaphoreCount = 0,
200 .pWaitSemaphores = nullptr,
201 .pWaitDstStageMask = nullptr,
202 .commandBufferCount = 1,
203 .pCommandBuffers = cmdbuf.address(),
204 .signalSemaphoreCount = 0,
205 .pSignalSemaphores = nullptr,
206 };
207
208 m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence);
209
210 // Wait for completion.
211 fence.Wait();
212
213 // Wait for the next graphics queue submission if necessary.
214 std::unique_lock lk{m_submission_lock};
215 Common::CondvarWait(m_submission_cv, lk, stop_token, [this] {
216 return (std::chrono::steady_clock::now() - m_submission_time) <=
217 std::chrono::milliseconds{100};
218 });
219 }
220}
221
222} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h
new file mode 100644
index 000000000..99b5ac50b
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h
@@ -0,0 +1,35 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <chrono>
7#include <mutex>
8
9#include "common/polyfill_thread.h"
10#include "video_core/vulkan_common/vulkan_device.h"
11#include "video_core/vulkan_common/vulkan_memory_allocator.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13
14namespace Vulkan {
15
16class TurboMode {
17public:
18 explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld);
19 ~TurboMode();
20
21 void QueueSubmitted();
22
23private:
24 void Run(std::stop_token stop_token);
25
26 Device m_device;
27 MemoryAllocator m_allocator;
28 std::mutex m_submission_lock;
29 std::condition_variable_any m_submission_cv;
30 std::chrono::time_point<std::chrono::steady_clock> m_submission_time{};
31
32 std::jthread m_thread;
33};
34
35} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 8e77f5aa3..1458ec4c8 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -74,30 +74,6 @@ enum class NvidiaArchitecture {
74 VoltaOrOlder, 74 VoltaOrOlder,
75}; 75};
76 76
77constexpr std::array REQUIRED_EXTENSIONS{
78 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
79 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
80#ifdef _WIN32
81 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
82#endif
83#ifdef __unix__
84 VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
85#endif
86};
87
88constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_2{
89 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
90 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
91 VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
92 VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
93 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
94 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
95};
96
97constexpr std::array REQUIRED_EXTENSIONS_BEFORE_1_3{
98 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
99};
100
101template <typename T> 77template <typename T>
102void SetNext(void**& next, T& data) { 78void SetNext(void**& next, T& data) {
103 *next = &data; 79 *next = &data;
@@ -286,24 +262,9 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
286 return format_properties; 262 return format_properties;
287} 263}
288 264
289std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
290 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
291 std::vector<std::string> supported_extensions;
292 supported_extensions.reserve(extensions.size());
293 for (const auto& extension : extensions) {
294 supported_extensions.emplace_back(extension.extensionName);
295 }
296 return supported_extensions;
297}
298
299bool IsExtensionSupported(std::span<const std::string> supported_extensions,
300 std::string_view extension) {
301 return std::ranges::find(supported_extensions, extension) != supported_extensions.end();
302}
303
304NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, 265NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
305 std::span<const std::string> exts) { 266 const std::set<std::string, std::less<>>& exts) {
306 if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { 267 if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
307 VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; 268 VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
308 shading_rate_props.sType = 269 shading_rate_props.sType =
309 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; 270 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
@@ -316,423 +277,39 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
316 return NvidiaArchitecture::AmpereOrNewer; 277 return NvidiaArchitecture::AmpereOrNewer;
317 } 278 }
318 } 279 }
319 if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { 280 if (exts.contains(VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) {
320 return NvidiaArchitecture::Turing; 281 return NvidiaArchitecture::Turing;
321 } 282 }
322 return NvidiaArchitecture::VoltaOrOlder; 283 return NvidiaArchitecture::VoltaOrOlder;
323} 284}
285
286std::vector<const char*> ExtensionListForVulkan(
287 const std::set<std::string, std::less<>>& extensions) {
288 std::vector<const char*> output;
289 for (const auto& extension : extensions) {
290 output.push_back(extension.c_str());
291 }
292 return output;
293}
294
324} // Anonymous namespace 295} // Anonymous namespace
325 296
326Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, 297Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
327 const vk::InstanceDispatch& dld_) 298 const vk::InstanceDispatch& dld_)
328 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 299 : instance{instance_}, dld{dld_}, physical{physical_},
329 instance_version{properties.apiVersion}, supported_extensions{GetSupportedExtensions(
330 physical)},
331 format_properties(GetFormatProperties(physical)) { 300 format_properties(GetFormatProperties(physical)) {
332 CheckSuitability(surface != nullptr); 301 if (!GetSuitability(surface != nullptr)) {
302 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
303 }
333 SetupFamilies(surface); 304 SetupFamilies(surface);
334 SetupFeatures();
335 SetupProperties();
336
337 const auto queue_cis = GetDeviceQueueCreateInfos(); 305 const auto queue_cis = GetDeviceQueueCreateInfos();
338 const std::vector extensions = LoadExtensions(surface != nullptr);
339
340 VkPhysicalDeviceFeatures2 features2{
341 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
342 .pNext = nullptr,
343 .features{
344 .robustBufferAccess = true,
345 .fullDrawIndexUint32 = false,
346 .imageCubeArray = true,
347 .independentBlend = true,
348 .geometryShader = true,
349 .tessellationShader = true,
350 .sampleRateShading = true,
351 .dualSrcBlend = true,
352 .logicOp = true,
353 .multiDrawIndirect = true,
354 .drawIndirectFirstInstance = true,
355 .depthClamp = true,
356 .depthBiasClamp = true,
357 .fillModeNonSolid = true,
358 .depthBounds = is_depth_bounds_supported,
359 .wideLines = true,
360 .largePoints = true,
361 .alphaToOne = false,
362 .multiViewport = true,
363 .samplerAnisotropy = true,
364 .textureCompressionETC2 = false,
365 .textureCompressionASTC_LDR = is_optimal_astc_supported,
366 .textureCompressionBC = false,
367 .occlusionQueryPrecise = true,
368 .pipelineStatisticsQuery = false,
369 .vertexPipelineStoresAndAtomics = true,
370 .fragmentStoresAndAtomics = true,
371 .shaderTessellationAndGeometryPointSize = false,
372 .shaderImageGatherExtended = true,
373 .shaderStorageImageExtendedFormats = false,
374 .shaderStorageImageMultisample = is_shader_storage_image_multisample,
375 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
376 .shaderStorageImageWriteWithoutFormat = true,
377 .shaderUniformBufferArrayDynamicIndexing = false,
378 .shaderSampledImageArrayDynamicIndexing = false,
379 .shaderStorageBufferArrayDynamicIndexing = false,
380 .shaderStorageImageArrayDynamicIndexing = false,
381 .shaderClipDistance = true,
382 .shaderCullDistance = true,
383 .shaderFloat64 = is_shader_float64_supported,
384 .shaderInt64 = is_shader_int64_supported,
385 .shaderInt16 = is_shader_int16_supported,
386 .shaderResourceResidency = false,
387 .shaderResourceMinLod = false,
388 .sparseBinding = false,
389 .sparseResidencyBuffer = false,
390 .sparseResidencyImage2D = false,
391 .sparseResidencyImage3D = false,
392 .sparseResidency2Samples = false,
393 .sparseResidency4Samples = false,
394 .sparseResidency8Samples = false,
395 .sparseResidency16Samples = false,
396 .sparseResidencyAliased = false,
397 .variableMultisampleRate = false,
398 .inheritedQueries = false,
399 },
400 };
401 const void* first_next = &features2;
402 void** next = &features2.pNext;
403
404 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{
405 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
406 .pNext = nullptr,
407 .timelineSemaphore = true,
408 };
409 SetNext(next, timeline_semaphore);
410
411 VkPhysicalDevice16BitStorageFeatures bit16_storage{
412 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
413 .pNext = nullptr,
414 .storageBuffer16BitAccess = true,
415 .uniformAndStorageBuffer16BitAccess = true,
416 .storagePushConstant16 = false,
417 .storageInputOutput16 = false,
418 };
419 SetNext(next, bit16_storage);
420
421 VkPhysicalDevice8BitStorageFeatures bit8_storage{
422 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES,
423 .pNext = nullptr,
424 .storageBuffer8BitAccess = true,
425 .uniformAndStorageBuffer8BitAccess = true,
426 .storagePushConstant8 = false,
427 };
428 SetNext(next, bit8_storage);
429
430 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{
431 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
432 .pNext = nullptr,
433 .robustBufferAccess2 = true,
434 .robustImageAccess2 = true,
435 .nullDescriptor = true,
436 };
437 SetNext(next, robustness2);
438
439 VkPhysicalDeviceHostQueryResetFeatures host_query_reset{
440 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
441 .pNext = nullptr,
442 .hostQueryReset = true,
443 };
444 SetNext(next, host_query_reset);
445
446 VkPhysicalDeviceVariablePointerFeatures variable_pointers{
447 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES,
448 .pNext = nullptr,
449 .variablePointersStorageBuffer = VK_TRUE,
450 .variablePointers = VK_TRUE,
451 };
452 SetNext(next, variable_pointers);
453
454 VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{
455 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES,
456 .pNext = nullptr,
457 .shaderDemoteToHelperInvocation = true,
458 };
459 SetNext(next, demote);
460
461 VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{
462 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES,
463 .pNext = nullptr,
464 .shaderDrawParameters = true,
465 };
466 SetNext(next, draw_parameters);
467
468 VkPhysicalDeviceShaderFloat16Int8Features float16_int8;
469 if (is_int8_supported || is_float16_supported) {
470 float16_int8 = {
471 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES,
472 .pNext = nullptr,
473 .shaderFloat16 = is_float16_supported,
474 .shaderInt8 = is_int8_supported,
475 };
476 SetNext(next, float16_int8);
477 }
478 if (!is_float16_supported) {
479 LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
480 }
481 if (!is_int8_supported) {
482 LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively");
483 }
484
485 if (!nv_viewport_swizzle) {
486 LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
487 }
488
489 if (!nv_viewport_array2) {
490 LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
491 }
492
493 if (!nv_geometry_shader_passthrough) {
494 LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
495 }
496
497 VkPhysicalDeviceUniformBufferStandardLayoutFeatures std430_layout;
498 if (khr_uniform_buffer_standard_layout) {
499 std430_layout = {
500 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES,
501 .pNext = nullptr,
502 .uniformBufferStandardLayout = true,
503 };
504 SetNext(next, std430_layout);
505 } else {
506 LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
507 }
508
509 VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
510 if (ext_index_type_uint8) {
511 index_type_uint8 = {
512 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT,
513 .pNext = nullptr,
514 .indexTypeUint8 = true,
515 };
516 SetNext(next, index_type_uint8);
517 } else {
518 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
519 }
520
521 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart;
522 if (is_topology_list_restart_supported || is_patch_list_restart_supported) {
523 primitive_topology_list_restart = {
524 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT,
525 .pNext = nullptr,
526 .primitiveTopologyListRestart = is_topology_list_restart_supported,
527 .primitiveTopologyPatchListRestart = is_patch_list_restart_supported,
528 };
529 SetNext(next, primitive_topology_list_restart);
530 } else {
531 LOG_INFO(Render_Vulkan, "Device doesn't support list topology primitive restart");
532 }
533
534 VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
535 if (ext_transform_feedback) {
536 transform_feedback = {
537 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
538 .pNext = nullptr,
539 .transformFeedback = true,
540 .geometryStreams = true,
541 };
542 SetNext(next, transform_feedback);
543 } else {
544 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
545 }
546
547 VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
548 if (ext_custom_border_color) {
549 custom_border = {
550 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
551 .pNext = nullptr,
552 .customBorderColors = VK_TRUE,
553 .customBorderColorWithoutFormat = VK_TRUE,
554 };
555 SetNext(next, custom_border);
556 } else {
557 LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
558 }
559
560 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state;
561 if (ext_extended_dynamic_state) {
562 dynamic_state = {
563 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT,
564 .pNext = nullptr,
565 .extendedDynamicState = VK_TRUE,
566 };
567 SetNext(next, dynamic_state);
568 } else {
569 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
570 }
571
572 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state_2;
573 if (ext_extended_dynamic_state_2) {
574 dynamic_state_2 = {
575 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT,
576 .pNext = nullptr,
577 .extendedDynamicState2 = VK_TRUE,
578 .extendedDynamicState2LogicOp = ext_extended_dynamic_state_2_extra ? VK_TRUE : VK_FALSE,
579 .extendedDynamicState2PatchControlPoints = VK_FALSE,
580 };
581 SetNext(next, dynamic_state_2);
582 } else {
583 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 2");
584 }
585 306
586 VkPhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3; 307 // GetSuitability has already configured the linked list of features for us.
587 if (ext_extended_dynamic_state_3) { 308 // Reuse it here.
588 dynamic_state_3 = { 309 const void* first_next = &features2;
589 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT,
590 .pNext = nullptr,
591 .extendedDynamicState3TessellationDomainOrigin = VK_FALSE,
592 .extendedDynamicState3DepthClampEnable =
593 ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE,
594 .extendedDynamicState3PolygonMode = VK_FALSE,
595 .extendedDynamicState3RasterizationSamples = VK_FALSE,
596 .extendedDynamicState3SampleMask = VK_FALSE,
597 .extendedDynamicState3AlphaToCoverageEnable = VK_FALSE,
598 .extendedDynamicState3AlphaToOneEnable = VK_FALSE,
599 .extendedDynamicState3LogicOpEnable =
600 ext_extended_dynamic_state_3_enables ? VK_TRUE : VK_FALSE,
601 .extendedDynamicState3ColorBlendEnable =
602 ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE,
603 .extendedDynamicState3ColorBlendEquation =
604 ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE,
605 .extendedDynamicState3ColorWriteMask =
606 ext_extended_dynamic_state_3_blend ? VK_TRUE : VK_FALSE,
607 .extendedDynamicState3RasterizationStream = VK_FALSE,
608 .extendedDynamicState3ConservativeRasterizationMode = VK_FALSE,
609 .extendedDynamicState3ExtraPrimitiveOverestimationSize = VK_FALSE,
610 .extendedDynamicState3DepthClipEnable = VK_FALSE,
611 .extendedDynamicState3SampleLocationsEnable = VK_FALSE,
612 .extendedDynamicState3ColorBlendAdvanced = VK_FALSE,
613 .extendedDynamicState3ProvokingVertexMode = VK_FALSE,
614 .extendedDynamicState3LineRasterizationMode = VK_FALSE,
615 .extendedDynamicState3LineStippleEnable = VK_FALSE,
616 .extendedDynamicState3DepthClipNegativeOneToOne = VK_FALSE,
617 .extendedDynamicState3ViewportWScalingEnable = VK_FALSE,
618 .extendedDynamicState3ViewportSwizzle = VK_FALSE,
619 .extendedDynamicState3CoverageToColorEnable = VK_FALSE,
620 .extendedDynamicState3CoverageToColorLocation = VK_FALSE,
621 .extendedDynamicState3CoverageModulationMode = VK_FALSE,
622 .extendedDynamicState3CoverageModulationTableEnable = VK_FALSE,
623 .extendedDynamicState3CoverageModulationTable = VK_FALSE,
624 .extendedDynamicState3CoverageReductionMode = VK_FALSE,
625 .extendedDynamicState3RepresentativeFragmentTestEnable = VK_FALSE,
626 .extendedDynamicState3ShadingRateImageEnable = VK_FALSE,
627 };
628 SetNext(next, dynamic_state_3);
629 } else {
630 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state 3");
631 }
632
633 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
634 if (ext_line_rasterization) {
635 line_raster = {
636 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
637 .pNext = nullptr,
638 .rectangularLines = VK_TRUE,
639 .bresenhamLines = VK_FALSE,
640 .smoothLines = VK_TRUE,
641 .stippledRectangularLines = VK_FALSE,
642 .stippledBresenhamLines = VK_FALSE,
643 .stippledSmoothLines = VK_FALSE,
644 };
645 SetNext(next, line_raster);
646 } else {
647 LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
648 }
649
650 if (!ext_conservative_rasterization) {
651 LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
652 }
653
654 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
655 if (ext_provoking_vertex) {
656 provoking_vertex = {
657 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
658 .pNext = nullptr,
659 .provokingVertexLast = VK_TRUE,
660 .transformFeedbackPreservesProvokingVertex = VK_TRUE,
661 };
662 SetNext(next, provoking_vertex);
663 } else {
664 LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
665 }
666
667 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
668 if (ext_vertex_input_dynamic_state) {
669 vertex_input_dynamic = {
670 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
671 .pNext = nullptr,
672 .vertexInputDynamicState = VK_TRUE,
673 };
674 SetNext(next, vertex_input_dynamic);
675 } else {
676 LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
677 }
678
679 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
680 if (ext_shader_atomic_int64) {
681 atomic_int64 = {
682 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES,
683 .pNext = nullptr,
684 .shaderBufferInt64Atomics = VK_TRUE,
685 .shaderSharedInt64Atomics = VK_TRUE,
686 };
687 SetNext(next, atomic_int64);
688 }
689
690 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
691 if (khr_workgroup_memory_explicit_layout && is_shader_int16_supported) {
692 workgroup_layout = {
693 .sType =
694 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
695 .pNext = nullptr,
696 .workgroupMemoryExplicitLayout = VK_TRUE,
697 .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
698 .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
699 .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
700 };
701 SetNext(next, workgroup_layout);
702 } else if (khr_workgroup_memory_explicit_layout) {
703 // TODO(lat9nq): Find a proper fix for this
704 LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_workgroup_memory_explicit_layout due to a "
705 "yuzu bug when host driver does not support 16-bit integers");
706 khr_workgroup_memory_explicit_layout = false;
707 }
708
709 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties;
710 if (khr_pipeline_executable_properties) {
711 LOG_INFO(Render_Vulkan, "Enabling shader feedback, expect slower shader build times");
712 executable_properties = {
713 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR,
714 .pNext = nullptr,
715 .pipelineExecutableInfo = VK_TRUE,
716 };
717 SetNext(next, executable_properties);
718 }
719
720 if (!ext_depth_range_unrestricted) {
721 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
722 }
723
724 VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features;
725 if (ext_depth_clip_control) {
726 depth_clip_control_features = {
727 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT,
728 .pNext = nullptr,
729 .depthClipControl = VK_TRUE,
730 };
731 SetNext(next, depth_clip_control_features);
732 }
733 310
734 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; 311 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv{};
735 if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { 312 if (Settings::values.enable_nsight_aftermath && extensions.device_diagnostics_config) {
736 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); 313 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
737 314
738 diagnostics_nv = { 315 diagnostics_nv = {
@@ -744,33 +321,48 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
744 }; 321 };
745 first_next = &diagnostics_nv; 322 first_next = &diagnostics_nv;
746 } 323 }
747 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
748 324
749 is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; 325 is_blit_depth_stencil_supported = TestDepthStencilBlits();
750 is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; 326 is_optimal_astc_supported = ComputeIsOptimalAstcSupported();
751 is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || 327 is_warp_potentially_bigger = !extensions.subgroup_size_control ||
752 properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; 328 properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize;
329
330 is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
331 is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
332 is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
333 properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
334
335 supports_d24_depth =
336 IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
337 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
753 338
754 CollectPhysicalMemoryInfo(); 339 CollectPhysicalMemoryInfo();
755 CollectTelemetryParameters();
756 CollectToolingInfo(); 340 CollectToolingInfo();
757 341
758 if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { 342 const VkDriverId driver_id = properties.driver.driverID;
759 const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; 343 const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
344 const bool is_amd_driver =
345 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
346 const bool is_amd = is_amd_driver || is_radv;
347 const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
348 const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
349 const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
760 350
351 if (is_nvidia) {
352 const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
761 const auto arch = GetNvidiaArchitecture(physical, supported_extensions); 353 const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
762 switch (arch) { 354 switch (arch) {
763 case NvidiaArchitecture::AmpereOrNewer: 355 case NvidiaArchitecture::AmpereOrNewer:
764 LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); 356 LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
765 is_float16_supported = false; 357 features.shader_float16_int8.shaderFloat16 = false;
766 break; 358 break;
767 case NvidiaArchitecture::Turing: 359 case NvidiaArchitecture::Turing:
768 break; 360 break;
769 case NvidiaArchitecture::VoltaOrOlder: 361 case NvidiaArchitecture::VoltaOrOlder:
770 if (nv_major_version < 527) { 362 if (nv_major_version < 527) {
771 LOG_WARNING(Render_Vulkan, 363 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
772 "Blacklisting Volta and older from VK_KHR_push_descriptor"); 364 extensions.push_descriptor = false;
773 khr_push_descriptor = false; 365 loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
774 } 366 }
775 break; 367 break;
776 } 368 }
@@ -779,75 +371,75 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
779 cant_blit_msaa = true; 371 cant_blit_msaa = true;
780 } 372 }
781 } 373 }
782 const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; 374 if (extensions.extended_dynamic_state && is_radv) {
783 if (ext_extended_dynamic_state && is_radv) {
784 // Mask driver version variant 375 // Mask driver version variant
785 const u32 version = (properties.driverVersion << 3) >> 3; 376 const u32 version = (properties.properties.driverVersion << 3) >> 3;
786 if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { 377 if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
787 LOG_WARNING(Render_Vulkan, 378 LOG_WARNING(Render_Vulkan,
788 "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); 379 "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
789 ext_extended_dynamic_state = false; 380 extensions.extended_dynamic_state = false;
381 loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
790 } 382 }
791 } 383 }
792 if (ext_vertex_input_dynamic_state && is_radv) { 384 if (extensions.extended_dynamic_state2 && is_radv) {
385 const u32 version = (properties.properties.driverVersion << 3) >> 3;
386 if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
387 LOG_WARNING(
388 Render_Vulkan,
389 "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2");
390 features.extended_dynamic_state2.extendedDynamicState2 = false;
391 features.extended_dynamic_state2.extendedDynamicState2LogicOp = false;
392 features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints = false;
393 extensions.extended_dynamic_state2 = false;
394 loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
395 }
396 }
397 if (extensions.vertex_input_dynamic_state && is_radv) {
793 // TODO(ameerj): Blacklist only offending driver versions 398 // TODO(ameerj): Blacklist only offending driver versions
794 // TODO(ameerj): Confirm if RDNA1 is affected 399 // TODO(ameerj): Confirm if RDNA1 is affected
795 const bool is_rdna2 = 400 const bool is_rdna2 =
796 IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); 401 supported_extensions.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
797 if (is_rdna2) { 402 if (is_rdna2) {
798 LOG_WARNING(Render_Vulkan, 403 LOG_WARNING(Render_Vulkan,
799 "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); 404 "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware");
800 ext_vertex_input_dynamic_state = false; 405 extensions.vertex_input_dynamic_state = false;
406 loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
801 } 407 }
802 } 408 }
803 if (ext_extended_dynamic_state_2 && is_radv) {
804 const u32 version = (properties.driverVersion << 3) >> 3;
805 if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) {
806 LOG_WARNING(
807 Render_Vulkan,
808 "RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2");
809 ext_extended_dynamic_state_2 = false;
810 ext_extended_dynamic_state_2_extra = false;
811 }
812 }
813 sets_per_pool = 64;
814 409
815 const bool is_amd = 410 sets_per_pool = 64;
816 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; 411 if (is_amd_driver) {
817 if (is_amd) {
818 // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2. 412 // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2.
819 sets_per_pool = 96; 413 sets_per_pool = 96;
820 // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken. 414 // Disable VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT on AMD GCN4 and lower as it is broken.
821 if (!is_float16_supported) { 415 if (!features.shader_float16_int8.shaderFloat16) {
822 LOG_WARNING( 416 LOG_WARNING(Render_Vulkan,
823 Render_Vulkan, 417 "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT");
824 "AMD GCN4 and earlier do not properly support VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT");
825 has_broken_cube_compatibility = true; 418 has_broken_cube_compatibility = true;
826 } 419 }
827 } 420 }
828 const bool is_amd_or_radv = is_amd || is_radv; 421 if (extensions.sampler_filter_minmax && is_amd) {
829 if (ext_sampler_filter_minmax && is_amd_or_radv) {
830 // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. 422 // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
831 if (!is_float16_supported) { 423 if (!features.shader_float16_int8.shaderFloat16) {
832 LOG_WARNING(Render_Vulkan, 424 LOG_WARNING(Render_Vulkan,
833 "Blacklisting AMD GCN4 and earlier for VK_EXT_sampler_filter_minmax"); 425 "AMD GCN4 and earlier have broken VK_EXT_sampler_filter_minmax");
834 ext_sampler_filter_minmax = false; 426 extensions.sampler_filter_minmax = false;
427 loaded_extensions.erase(VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME);
835 } 428 }
836 } 429 }
837 430
838 const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; 431 if (extensions.vertex_input_dynamic_state && is_intel_windows) {
839 const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; 432 const u32 version = (properties.properties.driverVersion << 3) >> 3;
840 if (ext_vertex_input_dynamic_state && is_intel_windows) {
841 const u32 version = (properties.driverVersion << 3) >> 3;
842 if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) { 433 if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
843 LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); 434 LOG_WARNING(Render_Vulkan, "Intel has broken VK_EXT_vertex_input_dynamic_state");
844 ext_vertex_input_dynamic_state = false; 435 extensions.vertex_input_dynamic_state = false;
436 loaded_extensions.erase(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
845 } 437 }
846 } 438 }
847 if (is_float16_supported && is_intel_windows) { 439 if (features.shader_float16_int8.shaderFloat16 && is_intel_windows) {
848 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. 440 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
849 LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); 441 LOG_WARNING(Render_Vulkan, "Intel has broken float16 math");
850 is_float16_supported = false; 442 features.shader_float16_int8.shaderFloat16 = false;
851 } 443 }
852 if (is_intel_windows) { 444 if (is_intel_windows) {
853 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); 445 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
@@ -858,9 +450,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
858 must_emulate_bgr565 = true; 450 must_emulate_bgr565 = true;
859 } 451 }
860 452
861 supports_d24_depth = 453 logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions),
862 IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, 454 first_next, dld);
863 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal);
864 455
865 graphics_queue = logical.GetQueue(graphics_family); 456 graphics_queue = logical.GetQueue(graphics_family);
866 present_queue = logical.GetQueue(present_family); 457 present_queue = logical.GetQueue(present_family);
@@ -915,7 +506,7 @@ void Device::SaveShader(std::span<const u32> spirv) const {
915 } 506 }
916} 507}
917 508
918bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { 509bool Device::ComputeIsOptimalAstcSupported() const {
919 // Disable for now to avoid converting ASTC twice. 510 // Disable for now to avoid converting ASTC twice.
920 static constexpr std::array astc_formats = { 511 static constexpr std::array astc_formats = {
921 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 512 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
@@ -933,7 +524,7 @@ bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) co
933 VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, 524 VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
934 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 525 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
935 }; 526 };
936 if (!features.textureCompressionASTC_LDR) { 527 if (!features.features.textureCompressionASTC_LDR) {
937 return false; 528 return false;
938 } 529 }
939 const auto format_feature_usage{ 530 const auto format_feature_usage{
@@ -971,7 +562,7 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
971} 562}
972 563
973std::string Device::GetDriverName() const { 564std::string Device::GetDriverName() const {
974 switch (driver_id) { 565 switch (properties.driver.driverID) {
975 case VK_DRIVER_ID_AMD_PROPRIETARY: 566 case VK_DRIVER_ID_AMD_PROPRIETARY:
976 return "AMD"; 567 return "AMD";
977 case VK_DRIVER_ID_AMD_OPEN_SOURCE: 568 case VK_DRIVER_ID_AMD_OPEN_SOURCE:
@@ -987,510 +578,336 @@ std::string Device::GetDriverName() const {
987 case VK_DRIVER_ID_MESA_LLVMPIPE: 578 case VK_DRIVER_ID_MESA_LLVMPIPE:
988 return "LAVAPIPE"; 579 return "LAVAPIPE";
989 default: 580 default:
990 return vendor_name; 581 return properties.driver.driverName;
991 } 582 }
992} 583}
993 584
994static std::vector<const char*> ExtensionsRequiredForInstanceVersion(u32 available_version) { 585bool Device::ShouldBoostClocks() const {
995 std::vector<const char*> extensions{REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()}; 586 const auto driver_id = properties.driver.driverID;
587 const auto vendor_id = properties.properties.vendorID;
588 const auto device_id = properties.properties.deviceID;
996 589
997 if (available_version < VK_API_VERSION_1_2) { 590 const bool validated_driver =
998 extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_2.begin(), 591 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
999 REQUIRED_EXTENSIONS_BEFORE_1_2.end()); 592 driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY ||
1000 } 593 driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
594 driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
1001 595
1002 if (available_version < VK_API_VERSION_1_3) { 596 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
1003 extensions.insert(extensions.end(), REQUIRED_EXTENSIONS_BEFORE_1_3.begin(),
1004 REQUIRED_EXTENSIONS_BEFORE_1_3.end());
1005 }
1006 597
1007 return extensions; 598 return validated_driver && !is_steam_deck;
1008} 599}
1009 600
1010void Device::CheckSuitability(bool requires_swapchain) const { 601bool Device::GetSuitability(bool requires_swapchain) {
1011 std::vector<const char*> required_extensions = 602 // Assume we will be suitable.
1012 ExtensionsRequiredForInstanceVersion(instance_version); 603 bool suitable = true;
1013 std::vector<const char*> available_extensions;
1014 604
1015 if (requires_swapchain) { 605 // Configure properties.
1016 required_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 606 properties.properties = physical.GetProperties();
1017 } 607
608 // Set instance version.
609 instance_version = properties.properties.apiVersion;
1018 610
611 // Minimum of API version 1.1 is required. (This is well-supported.)
612 ASSERT(instance_version >= VK_API_VERSION_1_1);
613
614 // Get available extensions.
1019 auto extension_properties = physical.EnumerateDeviceExtensionProperties(); 615 auto extension_properties = physical.EnumerateDeviceExtensionProperties();
1020 616
617 // Get the set of supported extensions.
618 supported_extensions.clear();
1021 for (const VkExtensionProperties& property : extension_properties) { 619 for (const VkExtensionProperties& property : extension_properties) {
1022 available_extensions.push_back(property.extensionName); 620 supported_extensions.insert(property.extensionName);
1023 } 621 }
1024 622
1025 bool has_all_required_extensions = true; 623 // Generate list of extensions to load.
1026 for (const char* requirement_name : required_extensions) { 624 loaded_extensions.clear();
1027 const bool found =
1028 std::ranges::any_of(available_extensions, [&](const char* extension_name) {
1029 return std::strcmp(requirement_name, extension_name) == 0;
1030 });
1031 625
1032 if (!found) { 626#define EXTENSION(prefix, macro_name, var_name) \
1033 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", requirement_name); 627 if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \
1034 has_all_required_extensions = false; 628 loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
1035 } 629 extensions.var_name = true; \
1036 } 630 }
1037 631#define FEATURE_EXTENSION(prefix, struct_name, macro_name, var_name) \
1038 if (!has_all_required_extensions) { 632 if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \
1039 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 633 loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
634 extensions.var_name = true; \
1040 } 635 }
1041 636
1042 struct LimitTuple { 637 if (instance_version < VK_API_VERSION_1_2) {
1043 u32 minimum; 638 FOR_EACH_VK_FEATURE_1_2(FEATURE_EXTENSION);
1044 u32 value; 639 }
1045 const char* name; 640 if (instance_version < VK_API_VERSION_1_3) {
1046 }; 641 FOR_EACH_VK_FEATURE_1_3(FEATURE_EXTENSION);
1047 const VkPhysicalDeviceLimits& limits{properties.limits};
1048 const std::array limits_report{
1049 LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
1050 LimitTuple{16, limits.maxViewports, "maxViewports"},
1051 LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
1052 LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
1053 };
1054 for (const auto& tuple : limits_report) {
1055 if (tuple.value < tuple.minimum) {
1056 LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name,
1057 tuple.minimum, tuple.value);
1058 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
1059 }
1060 } 642 }
1061 VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures demote{};
1062 demote.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES;
1063 demote.pNext = nullptr;
1064 643
1065 VkPhysicalDeviceVariablePointerFeatures variable_pointers{}; 644 FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION);
1066 variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES; 645 FOR_EACH_VK_EXTENSION(EXTENSION);
1067 variable_pointers.pNext = &demote; 646#ifdef _WIN32
647 FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
648#endif
1068 649
1069 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; 650#undef FEATURE_EXTENSION
1070 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; 651#undef EXTENSION
1071 robustness2.pNext = &variable_pointers;
1072 652
1073 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore{}; 653 // Some extensions are mandatory. Check those.
1074 timeline_semaphore.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; 654#define CHECK_EXTENSION(extension_name) \
1075 timeline_semaphore.pNext = &robustness2; 655 if (!loaded_extensions.contains(extension_name)) { \
656 LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \
657 suitable = false; \
658 }
1076 659
1077 VkPhysicalDevice16BitStorageFeatures bit16_storage{}; 660#define LOG_EXTENSION(extension_name) \
1078 bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; 661 if (!loaded_extensions.contains(extension_name)) { \
1079 bit16_storage.pNext = &timeline_semaphore; 662 LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \
663 }
1080 664
1081 VkPhysicalDevice8BitStorageFeatures bit8_storage{}; 665 FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION);
1082 bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; 666 FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION);
1083 bit8_storage.pNext = &bit16_storage; 667#ifdef _WIN32
668 FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION);
669#else
670 FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION);
671#endif
1084 672
1085 VkPhysicalDeviceHostQueryResetFeatures host_query_reset{}; 673 if (requires_swapchain) {
1086 host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; 674 CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
1087 host_query_reset.pNext = &bit8_storage; 675 }
1088 676
1089 VkPhysicalDeviceShaderDrawParametersFeatures draw_parameters{}; 677#undef LOG_EXTENSION
1090 draw_parameters.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES; 678#undef CHECK_EXTENSION
1091 draw_parameters.pNext = &host_query_reset;
1092 679
1093 VkPhysicalDeviceFeatures2 features2{}; 680 // Generate the linked list of features to test.
1094 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 681 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1095 features2.pNext = &draw_parameters;
1096 682
1097 physical.GetFeatures2(features2); 683 // Set next pointer.
684 void** next = &features2.pNext;
1098 685
1099 const VkPhysicalDeviceFeatures& features{features2.features}; 686 // Test all features we know about. If the feature is not available in core at our
1100 std::array feature_report{ 687 // current API version, and was not enabled by an extension, skip testing the feature.
1101 std::make_pair(features.robustBufferAccess, "robustBufferAccess"), 688 // We set the structure sType explicitly here as it is zeroed by the constructor.
1102 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 689#define FEATURE(prefix, struct_name, macro_name, var_name) \
1103 std::make_pair(features.imageCubeArray, "imageCubeArray"), 690 features.var_name.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES; \
1104 std::make_pair(features.independentBlend, "independentBlend"), 691 SetNext(next, features.var_name);
1105 std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"),
1106 std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"),
1107 std::make_pair(features.depthClamp, "depthClamp"),
1108 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
1109 std::make_pair(features.largePoints, "largePoints"),
1110 std::make_pair(features.multiViewport, "multiViewport"),
1111 std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
1112 std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
1113 std::make_pair(features.wideLines, "wideLines"),
1114 std::make_pair(features.geometryShader, "geometryShader"),
1115 std::make_pair(features.tessellationShader, "tessellationShader"),
1116 std::make_pair(features.sampleRateShading, "sampleRateShading"),
1117 std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
1118 std::make_pair(features.logicOp, "logicOp"),
1119 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
1120 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
1121 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
1122 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
1123 "shaderStorageImageWriteWithoutFormat"),
1124 std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
1125 std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
1126 std::make_pair(variable_pointers.variablePointers, "variablePointers"),
1127 std::make_pair(variable_pointers.variablePointersStorageBuffer,
1128 "variablePointersStorageBuffer"),
1129 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
1130 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
1131 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
1132 std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
1133 std::make_pair(timeline_semaphore.timelineSemaphore, "timelineSemaphore"),
1134 std::make_pair(bit16_storage.storageBuffer16BitAccess, "storageBuffer16BitAccess"),
1135 std::make_pair(bit16_storage.uniformAndStorageBuffer16BitAccess,
1136 "uniformAndStorageBuffer16BitAccess"),
1137 std::make_pair(bit8_storage.storageBuffer8BitAccess, "storageBuffer8BitAccess"),
1138 std::make_pair(bit8_storage.uniformAndStorageBuffer8BitAccess,
1139 "uniformAndStorageBuffer8BitAccess"),
1140 std::make_pair(host_query_reset.hostQueryReset, "hostQueryReset"),
1141 std::make_pair(draw_parameters.shaderDrawParameters, "shaderDrawParameters"),
1142 };
1143 692
1144 bool has_all_required_features = true; 693#define EXT_FEATURE(prefix, struct_name, macro_name, var_name) \
1145 for (const auto& [is_supported, name] : feature_report) { 694 if (extensions.var_name) { \
1146 if (!is_supported) { 695 features.var_name.sType = \
1147 LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); 696 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES_##prefix; \
1148 has_all_required_features = false; 697 SetNext(next, features.var_name); \
1149 }
1150 } 698 }
1151 699
1152 if (!has_all_required_features) { 700 FOR_EACH_VK_FEATURE_1_1(FEATURE);
1153 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 701 FOR_EACH_VK_FEATURE_EXT(EXT_FEATURE);
702 if (instance_version >= VK_API_VERSION_1_2) {
703 FOR_EACH_VK_FEATURE_1_2(FEATURE);
704 } else {
705 FOR_EACH_VK_FEATURE_1_2(EXT_FEATURE);
1154 } 706 }
1155} 707 if (instance_version >= VK_API_VERSION_1_3) {
1156 708 FOR_EACH_VK_FEATURE_1_3(FEATURE);
1157std::vector<const char*> Device::LoadExtensions(bool requires_surface) { 709 } else {
1158 std::vector<const char*> extensions = ExtensionsRequiredForInstanceVersion(instance_version); 710 FOR_EACH_VK_FEATURE_1_3(EXT_FEATURE);
1159 if (requires_surface) {
1160 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
1161 } 711 }
1162 712
1163 bool has_khr_shader_float16_int8{}; 713#undef EXT_FEATURE
1164 bool has_khr_workgroup_memory_explicit_layout{}; 714#undef FEATURE
1165 bool has_khr_pipeline_executable_properties{};
1166 bool has_khr_image_format_list{};
1167 bool has_khr_swapchain_mutable_format{};
1168 bool has_ext_subgroup_size_control{};
1169 bool has_ext_transform_feedback{};
1170 bool has_ext_custom_border_color{};
1171 bool has_ext_extended_dynamic_state{};
1172 bool has_ext_extended_dynamic_state_2{};
1173 bool has_ext_extended_dynamic_state_3{};
1174 bool has_ext_shader_atomic_int64{};
1175 bool has_ext_provoking_vertex{};
1176 bool has_ext_vertex_input_dynamic_state{};
1177 bool has_ext_line_rasterization{};
1178 bool has_ext_primitive_topology_list_restart{};
1179 bool has_ext_depth_clip_control{};
1180 for (const std::string& extension : supported_extensions) {
1181 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
1182 bool push) {
1183 if (extension != name) {
1184 return;
1185 }
1186 if (push) {
1187 extensions.push_back(name);
1188 }
1189 if (status) {
1190 status->get() = true;
1191 }
1192 };
1193 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
1194 test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
1195 test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
1196 true);
1197 test(khr_uniform_buffer_standard_layout,
1198 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
1199 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
1200 test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
1201 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
1202 test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true);
1203 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
1204 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
1205 test(has_ext_primitive_topology_list_restart,
1206 VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME, true);
1207 test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
1208 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
1209 true);
1210 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
1211 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
1212 test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
1213 true);
1214 test(has_ext_depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME, false);
1215 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
1216 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
1217 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
1218 test(has_ext_extended_dynamic_state_2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME,
1219 false);
1220 test(has_ext_extended_dynamic_state_3, VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME,
1221 false);
1222 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, true);
1223 test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
1224 test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
1225 false);
1226 test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
1227 test(has_khr_workgroup_memory_explicit_layout,
1228 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
1229 test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
1230 test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
1231 false);
1232 test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
1233 test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true);
1234 if (Settings::values.enable_nsight_aftermath) {
1235 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
1236 true);
1237 }
1238 if (Settings::values.renderer_shader_feedback) {
1239 test(has_khr_pipeline_executable_properties,
1240 VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, false);
1241 }
1242 }
1243 VkPhysicalDeviceFeatures2 features{};
1244 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1245
1246 VkPhysicalDeviceProperties2 physical_properties{};
1247 physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1248
1249 if (has_khr_shader_float16_int8) {
1250 VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features;
1251 float16_int8_features.sType =
1252 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
1253 float16_int8_features.pNext = nullptr;
1254 features.pNext = &float16_int8_features;
1255
1256 physical.GetFeatures2(features);
1257 is_float16_supported = float16_int8_features.shaderFloat16;
1258 is_int8_supported = float16_int8_features.shaderInt8;
1259 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
1260 }
1261 if (has_ext_subgroup_size_control) {
1262 VkPhysicalDeviceSubgroupSizeControlFeatures subgroup_features;
1263 subgroup_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES;
1264 subgroup_features.pNext = nullptr;
1265 features.pNext = &subgroup_features;
1266 physical.GetFeatures2(features);
1267
1268 VkPhysicalDeviceSubgroupSizeControlProperties subgroup_properties;
1269 subgroup_properties.sType =
1270 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1271 subgroup_properties.pNext = nullptr;
1272 physical_properties.pNext = &subgroup_properties;
1273 physical.GetProperties2(physical_properties);
1274 715
1275 is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; 716 // Perform the feature test.
717 physical.GetFeatures2(features2);
718 features.features = features2.features;
1276 719
1277 if (subgroup_features.subgroupSizeControl && 720 // Some features are mandatory. Check those.
1278 subgroup_properties.minSubgroupSize <= GuestWarpSize && 721#define CHECK_FEATURE(feature, name) \
1279 subgroup_properties.maxSubgroupSize >= GuestWarpSize) { 722 if (!features.feature.name) { \
1280 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); 723 LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \
1281 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; 724 suitable = false; \
1282 ext_subgroup_size_control = true;
1283 }
1284 } else {
1285 is_warp_potentially_bigger = true;
1286 } 725 }
1287 if (has_ext_provoking_vertex) {
1288 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
1289 provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
1290 provoking_vertex.pNext = nullptr;
1291 features.pNext = &provoking_vertex;
1292 physical.GetFeatures2(features);
1293
1294 if (provoking_vertex.provokingVertexLast &&
1295 provoking_vertex.transformFeedbackPreservesProvokingVertex) {
1296 extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
1297 ext_provoking_vertex = true;
1298 }
1299 }
1300 if (has_ext_vertex_input_dynamic_state) {
1301 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
1302 vertex_input.sType =
1303 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
1304 vertex_input.pNext = nullptr;
1305 features.pNext = &vertex_input;
1306 physical.GetFeatures2(features);
1307
1308 if (vertex_input.vertexInputDynamicState) {
1309 extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
1310 ext_vertex_input_dynamic_state = true;
1311 }
1312 }
1313 if (has_ext_shader_atomic_int64) {
1314 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
1315 atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES;
1316 atomic_int64.pNext = nullptr;
1317 features.pNext = &atomic_int64;
1318 physical.GetFeatures2(features);
1319
1320 if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
1321 extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
1322 ext_shader_atomic_int64 = true;
1323 }
1324 }
1325 if (has_ext_transform_feedback) {
1326 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
1327 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
1328 tfb_features.pNext = nullptr;
1329 features.pNext = &tfb_features;
1330 physical.GetFeatures2(features);
1331
1332 VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
1333 tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
1334 tfb_properties.pNext = nullptr;
1335 physical_properties.pNext = &tfb_properties;
1336 physical.GetProperties2(physical_properties);
1337 726
1338 if (tfb_features.transformFeedback && tfb_features.geometryStreams && 727#define LOG_FEATURE(feature, name) \
1339 tfb_properties.maxTransformFeedbackStreams >= 4 && 728 if (!features.feature.name) { \
1340 tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && 729 LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \
1341 tfb_properties.transformFeedbackDraw) {
1342 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
1343 ext_transform_feedback = true;
1344 }
1345 }
1346 if (has_ext_custom_border_color) {
1347 VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
1348 border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
1349 border_features.pNext = nullptr;
1350 features.pNext = &border_features;
1351 physical.GetFeatures2(features);
1352
1353 if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) {
1354 extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
1355 ext_custom_border_color = true;
1356 }
1357 }
1358 if (has_ext_extended_dynamic_state) {
1359 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
1360 extended_dynamic_state.sType =
1361 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
1362 extended_dynamic_state.pNext = nullptr;
1363 features.pNext = &extended_dynamic_state;
1364 physical.GetFeatures2(features);
1365
1366 if (extended_dynamic_state.extendedDynamicState) {
1367 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
1368 ext_extended_dynamic_state = true;
1369 }
1370 }
1371 if (has_ext_extended_dynamic_state_2) {
1372 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT extended_dynamic_state_2;
1373 extended_dynamic_state_2.sType =
1374 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT;
1375 extended_dynamic_state_2.pNext = nullptr;
1376 features.pNext = &extended_dynamic_state_2;
1377 physical.GetFeatures2(features);
1378
1379 if (extended_dynamic_state_2.extendedDynamicState2) {
1380 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
1381 ext_extended_dynamic_state_2 = true;
1382 ext_extended_dynamic_state_2_extra =
1383 extended_dynamic_state_2.extendedDynamicState2LogicOp;
1384 }
1385 } 730 }
1386 if (has_ext_extended_dynamic_state_3) { 731
1387 VkPhysicalDeviceExtendedDynamicState3FeaturesEXT extended_dynamic_state_3; 732 FOR_EACH_VK_RECOMMENDED_FEATURE(LOG_FEATURE);
1388 extended_dynamic_state_3.sType = 733 FOR_EACH_VK_MANDATORY_FEATURE(CHECK_FEATURE);
1389 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT; 734
1390 extended_dynamic_state_3.pNext = nullptr; 735#undef LOG_FEATURE
1391 features.pNext = &extended_dynamic_state_3; 736#undef CHECK_FEATURE
1392 physical.GetFeatures2(features); 737
1393 738 // Generate linked list of properties.
1394 ext_extended_dynamic_state_3_blend = 739 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1395 extended_dynamic_state_3.extendedDynamicState3ColorBlendEnable && 740
1396 extended_dynamic_state_3.extendedDynamicState3ColorBlendEquation && 741 // Set next pointer.
1397 extended_dynamic_state_3.extendedDynamicState3ColorWriteMask; 742 next = &properties2.pNext;
1398 743
1399 ext_extended_dynamic_state_3_enables = 744 // Get driver info.
1400 extended_dynamic_state_3.extendedDynamicState3DepthClampEnable && 745 properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
1401 extended_dynamic_state_3.extendedDynamicState3LogicOpEnable; 746 SetNext(next, properties.driver);
1402 747
1403 ext_extended_dynamic_state_3 = 748 // Retrieve relevant extension properties.
1404 ext_extended_dynamic_state_3_blend || ext_extended_dynamic_state_3_enables; 749 if (extensions.shader_float_controls) {
1405 if (ext_extended_dynamic_state_3) { 750 properties.float_controls.sType =
1406 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); 751 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
1407 } 752 SetNext(next, properties.float_controls);
1408 } 753 }
1409 if (has_ext_line_rasterization) { 754 if (extensions.push_descriptor) {
1410 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; 755 properties.push_descriptor.sType =
1411 line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; 756 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
1412 line_raster.pNext = nullptr; 757 SetNext(next, properties.push_descriptor);
1413 features.pNext = &line_raster;
1414 physical.GetFeatures2(features);
1415 if (line_raster.rectangularLines && line_raster.smoothLines) {
1416 extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
1417 ext_line_rasterization = true;
1418 }
1419 } 758 }
1420 if (has_ext_depth_clip_control) { 759 if (extensions.subgroup_size_control) {
1421 VkPhysicalDeviceDepthClipControlFeaturesEXT depth_clip_control_features; 760 properties.subgroup_size_control.sType =
1422 depth_clip_control_features.sType = 761 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1423 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT; 762 SetNext(next, properties.subgroup_size_control);
1424 depth_clip_control_features.pNext = nullptr;
1425 features.pNext = &depth_clip_control_features;
1426 physical.GetFeatures2(features);
1427
1428 if (depth_clip_control_features.depthClipControl) {
1429 extensions.push_back(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
1430 ext_depth_clip_control = true;
1431 }
1432 } 763 }
1433 if (has_khr_workgroup_memory_explicit_layout) { 764 if (extensions.transform_feedback) {
1434 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; 765 properties.transform_feedback.sType =
1435 layout.sType = 766 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
1436 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; 767 SetNext(next, properties.transform_feedback);
1437 layout.pNext = nullptr;
1438 features.pNext = &layout;
1439 physical.GetFeatures2(features);
1440
1441 if (layout.workgroupMemoryExplicitLayout &&
1442 layout.workgroupMemoryExplicitLayout8BitAccess &&
1443 layout.workgroupMemoryExplicitLayout16BitAccess &&
1444 layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
1445 extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
1446 khr_workgroup_memory_explicit_layout = true;
1447 }
1448 } 768 }
1449 if (has_khr_pipeline_executable_properties) { 769
1450 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR executable_properties; 770 // Perform the property fetch.
1451 executable_properties.sType = 771 physical.GetProperties2(properties2);
1452 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR; 772 properties.properties = properties2.properties;
1453 executable_properties.pNext = nullptr; 773
1454 features.pNext = &executable_properties; 774 // Unload extensions if feature support is insufficient.
1455 physical.GetFeatures2(features); 775 RemoveUnsuitableExtensions();
1456 776
1457 if (executable_properties.pipelineExecutableInfo) { 777 // Check limits.
1458 extensions.push_back(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME); 778 struct Limit {
1459 khr_pipeline_executable_properties = true; 779 u32 minimum;
780 u32 value;
781 const char* name;
782 };
783
784 const VkPhysicalDeviceLimits& limits{properties.properties.limits};
785 const std::array limits_report{
786 Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
787 Limit{16, limits.maxViewports, "maxViewports"},
788 Limit{8, limits.maxColorAttachments, "maxColorAttachments"},
789 Limit{8, limits.maxClipDistances, "maxClipDistances"},
790 };
791
792 for (const auto& [min, value, name] : limits_report) {
793 if (value < min) {
794 LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", name, min, value);
795 suitable = false;
1460 } 796 }
1461 } 797 }
1462 if (has_ext_primitive_topology_list_restart) {
1463 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT primitive_topology_list_restart{};
1464 primitive_topology_list_restart.sType =
1465 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT;
1466 primitive_topology_list_restart.pNext = nullptr;
1467 features.pNext = &primitive_topology_list_restart;
1468 physical.GetFeatures2(features);
1469
1470 is_topology_list_restart_supported =
1471 primitive_topology_list_restart.primitiveTopologyListRestart;
1472 is_patch_list_restart_supported =
1473 primitive_topology_list_restart.primitiveTopologyPatchListRestart;
1474 }
1475 if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
1476 extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
1477 extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
1478 khr_swapchain_mutable_format = true;
1479 }
1480 if (khr_push_descriptor) {
1481 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
1482 push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
1483 push_descriptor.pNext = nullptr;
1484 798
1485 physical_properties.pNext = &push_descriptor; 799 // Return whether we were suitable.
1486 physical.GetProperties2(physical_properties); 800 return suitable;
801}
1487 802
1488 max_push_descriptors = push_descriptor.maxPushDescriptors; 803void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) {
804 if (loaded_extensions.contains(extension_name) && !is_suitable) {
805 LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name);
806 loaded_extensions.erase(extension_name);
1489 } 807 }
808}
1490 809
1491 has_null_descriptor = true; 810void Device::RemoveUnsuitableExtensions() {
1492 811 // VK_EXT_custom_border_color
1493 return extensions; 812 extensions.custom_border_color = features.custom_border_color.customBorderColors &&
813 features.custom_border_color.customBorderColorWithoutFormat;
814 RemoveExtensionIfUnsuitable(extensions.custom_border_color,
815 VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
816
817 // VK_EXT_depth_clip_control
818 extensions.depth_clip_control = features.depth_clip_control.depthClipControl;
819 RemoveExtensionIfUnsuitable(extensions.depth_clip_control,
820 VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
821
822 // VK_EXT_extended_dynamic_state
823 extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState;
824 RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state,
825 VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
826
827 // VK_EXT_extended_dynamic_state2
828 extensions.extended_dynamic_state2 = features.extended_dynamic_state2.extendedDynamicState2;
829 RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state2,
830 VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
831
832 // VK_EXT_extended_dynamic_state3
833 dynamic_state3_blending =
834 features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable &&
835 features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation &&
836 features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask;
837 dynamic_state3_enables =
838 features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable &&
839 features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
840
841 extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables;
842 dynamic_state3_blending = dynamic_state3_blending && extensions.extended_dynamic_state3;
843 dynamic_state3_enables = dynamic_state3_enables && extensions.extended_dynamic_state3;
844 RemoveExtensionIfUnsuitable(extensions.extended_dynamic_state3,
845 VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
846
847 // VK_EXT_provoking_vertex
848 extensions.provoking_vertex =
849 features.provoking_vertex.provokingVertexLast &&
850 features.provoking_vertex.transformFeedbackPreservesProvokingVertex;
851 RemoveExtensionIfUnsuitable(extensions.provoking_vertex,
852 VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
853
854 // VK_KHR_shader_atomic_int64
855 extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
856 features.shader_atomic_int64.shaderSharedInt64Atomics;
857 RemoveExtensionIfUnsuitable(extensions.shader_atomic_int64,
858 VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
859
860 // VK_EXT_shader_demote_to_helper_invocation
861 extensions.shader_demote_to_helper_invocation =
862 features.shader_demote_to_helper_invocation.shaderDemoteToHelperInvocation;
863 RemoveExtensionIfUnsuitable(extensions.shader_demote_to_helper_invocation,
864 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
865
866 // VK_EXT_subgroup_size_control
867 extensions.subgroup_size_control =
868 features.subgroup_size_control.subgroupSizeControl &&
869 properties.subgroup_size_control.minSubgroupSize <= GuestWarpSize &&
870 properties.subgroup_size_control.maxSubgroupSize >= GuestWarpSize;
871 RemoveExtensionIfUnsuitable(extensions.subgroup_size_control,
872 VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
873
874 // VK_EXT_transform_feedback
875 extensions.transform_feedback =
876 features.transform_feedback.transformFeedback &&
877 features.transform_feedback.geometryStreams &&
878 properties.transform_feedback.maxTransformFeedbackStreams >= 4 &&
879 properties.transform_feedback.maxTransformFeedbackBuffers > 0 &&
880 properties.transform_feedback.transformFeedbackQueries &&
881 properties.transform_feedback.transformFeedbackDraw;
882 RemoveExtensionIfUnsuitable(extensions.transform_feedback,
883 VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
884
885 // VK_EXT_vertex_input_dynamic_state
886 extensions.vertex_input_dynamic_state =
887 features.vertex_input_dynamic_state.vertexInputDynamicState;
888 RemoveExtensionIfUnsuitable(extensions.vertex_input_dynamic_state,
889 VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
890
891 // VK_KHR_pipeline_executable_properties
892 if (Settings::values.renderer_shader_feedback.GetValue()) {
893 extensions.pipeline_executable_properties =
894 features.pipeline_executable_properties.pipelineExecutableInfo;
895 RemoveExtensionIfUnsuitable(extensions.pipeline_executable_properties,
896 VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
897 } else {
898 extensions.pipeline_executable_properties = false;
899 loaded_extensions.erase(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
900 }
901
902 // VK_KHR_workgroup_memory_explicit_layout
903 extensions.workgroup_memory_explicit_layout =
904 features.features.shaderInt16 &&
905 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
906 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess &&
907 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess &&
908 features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout;
909 RemoveExtensionIfUnsuitable(extensions.workgroup_memory_explicit_layout,
910 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
1494} 911}
1495 912
1496void Device::SetupFamilies(VkSurfaceKHR surface) { 913void Device::SetupFamilies(VkSurfaceKHR surface) {
@@ -1520,55 +937,12 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
1520 LOG_ERROR(Render_Vulkan, "Device lacks a present queue"); 937 LOG_ERROR(Render_Vulkan, "Device lacks a present queue");
1521 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 938 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
1522 } 939 }
1523 graphics_family = *graphics; 940 if (graphics) {
1524 present_family = *present; 941 graphics_family = *graphics;
1525} 942 }
1526 943 if (present) {
1527void Device::SetupFeatures() { 944 present_family = *present;
1528 const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; 945 }
1529 is_depth_bounds_supported = features.depthBounds;
1530 is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
1531 is_shader_float64_supported = features.shaderFloat64;
1532 is_shader_int64_supported = features.shaderInt64;
1533 is_shader_int16_supported = features.shaderInt16;
1534 is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
1535 is_blit_depth_stencil_supported = TestDepthStencilBlits();
1536 is_optimal_astc_supported = IsOptimalAstcSupported(features);
1537
1538 const VkPhysicalDeviceLimits& limits{properties.limits};
1539 max_vertex_input_attributes = limits.maxVertexInputAttributes;
1540 max_vertex_input_bindings = limits.maxVertexInputBindings;
1541}
1542
1543void Device::SetupProperties() {
1544 float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
1545
1546 VkPhysicalDeviceProperties2KHR properties2{};
1547 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1548 properties2.pNext = &float_controls;
1549
1550 physical.GetProperties2(properties2);
1551}
1552
1553void Device::CollectTelemetryParameters() {
1554 VkPhysicalDeviceDriverProperties driver{
1555 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
1556 .pNext = nullptr,
1557 .driverID = {},
1558 .driverName = {},
1559 .driverInfo = {},
1560 .conformanceVersion = {},
1561 };
1562
1563 VkPhysicalDeviceProperties2 device_properties{
1564 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
1565 .pNext = &driver,
1566 .properties = {},
1567 };
1568 physical.GetProperties2(device_properties);
1569
1570 driver_id = driver.driverID;
1571 vendor_name = driver.driverName;
1572} 946}
1573 947
1574u64 Device::GetDeviceMemoryUsage() const { 948u64 Device::GetDeviceMemoryUsage() const {
@@ -1586,7 +960,8 @@ u64 Device::GetDeviceMemoryUsage() const {
1586void Device::CollectPhysicalMemoryInfo() { 960void Device::CollectPhysicalMemoryInfo() {
1587 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; 961 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
1588 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; 962 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
1589 const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); 963 const auto mem_info =
964 physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr);
1590 const auto& mem_properties = mem_info.memoryProperties; 965 const auto& mem_properties = mem_info.memoryProperties;
1591 const size_t num_properties = mem_properties.memoryHeapCount; 966 const size_t num_properties = mem_properties.memoryHeapCount;
1592 device_access_memory = 0; 967 device_access_memory = 0;
@@ -1602,7 +977,7 @@ void Device::CollectPhysicalMemoryInfo() {
1602 if (is_heap_local) { 977 if (is_heap_local) {
1603 local_memory += mem_properties.memoryHeaps[element].size; 978 local_memory += mem_properties.memoryHeaps[element].size;
1604 } 979 }
1605 if (ext_memory_budget) { 980 if (extensions.memory_budget) {
1606 device_initial_usage += budget.heapUsage[element]; 981 device_initial_usage += budget.heapUsage[element];
1607 device_access_memory += budget.heapBudget[element]; 982 device_access_memory += budget.heapBudget[element];
1608 continue; 983 continue;
@@ -1618,7 +993,7 @@ void Device::CollectPhysicalMemoryInfo() {
1618} 993}
1619 994
1620void Device::CollectToolingInfo() { 995void Device::CollectToolingInfo() {
1621 if (!ext_tooling_info) { 996 if (!extensions.tooling_info) {
1622 return; 997 return;
1623 } 998 }
1624 auto tools{physical.GetPhysicalDeviceToolProperties()}; 999 auto tools{physical.GetPhysicalDeviceToolProperties()};
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 6042046e1..4cfb20bc2 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -3,6 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <set>
6#include <span> 7#include <span>
7#include <string> 8#include <string>
8#include <unordered_map> 9#include <unordered_map>
@@ -11,6 +12,155 @@
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
13 14
15// Define all features which may be used by the implementation here.
16// Vulkan version in the macro describes the minimum version required for feature availability.
17// If the Vulkan version is lower than the required version, the named extension is required.
18#define FOR_EACH_VK_FEATURE_1_1(FEATURE) \
19 FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \
20 FEATURE(KHR, 16BitStorage, 16BIT_STORAGE, bit16_storage) \
21 FEATURE(KHR, ShaderAtomicInt64, SHADER_ATOMIC_INT64, shader_atomic_int64) \
22 FEATURE(KHR, ShaderDrawParameters, SHADER_DRAW_PARAMETERS, shader_draw_parameters) \
23 FEATURE(KHR, ShaderFloat16Int8, SHADER_FLOAT16_INT8, shader_float16_int8) \
24 FEATURE(KHR, UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, \
25 uniform_buffer_standard_layout) \
26 FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer)
27
28#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \
29 FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \
30 FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \
31 FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
32
33#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \
34 FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \
35 shader_demote_to_helper_invocation)
36
37// Define all features which may be used by the implementation and require an extension here.
38#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
39 FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \
40 FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \
41 FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
42 FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
43 FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
44 FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
45 FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
46 FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
47 primitive_topology_list_restart) \
48 FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
49 FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
50 FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
51 FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
52 FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
53 pipeline_executable_properties) \
54 FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
55 workgroup_memory_explicit_layout)
56
57// Define miscellaneous extensions which may be used by the implementation here.
58#define FOR_EACH_VK_EXTENSION(EXTENSION) \
59 EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \
60 EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \
61 EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \
62 EXTENSION(EXT, ROBUSTNESS_2, robustness_2) \
63 EXTENSION(EXT, SAMPLER_FILTER_MINMAX, sampler_filter_minmax) \
64 EXTENSION(EXT, SHADER_STENCIL_EXPORT, shader_stencil_export) \
65 EXTENSION(EXT, SHADER_VIEWPORT_INDEX_LAYER, shader_viewport_index_layer) \
66 EXTENSION(EXT, TOOLING_INFO, tooling_info) \
67 EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \
68 EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \
69 EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \
70 EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \
71 EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \
72 EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
73 EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
74 EXTENSION(KHR, SWAPCHAIN, swapchain) \
75 EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
76 EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
77 EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
78 EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
79 EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle)
80
81#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \
82 EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32)
83
84// Define extensions which must be supported.
85#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
86 EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
87 EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
88 EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \
89 EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
90 EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
91
92#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \
93 EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME)
94
95#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \
96 EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME)
97
98// Define extensions where the absence of the extension may result in a degraded experience.
99#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
100 EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
101 EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \
102 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
103 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
104 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
105 EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
106 EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
107 EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \
108 EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \
109 EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME)
110
111// Define features which must be supported.
112#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \
113 FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
114 FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
115 FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
116 FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
117 FEATURE_NAME(features, depthBiasClamp) \
118 FEATURE_NAME(features, depthClamp) \
119 FEATURE_NAME(features, drawIndirectFirstInstance) \
120 FEATURE_NAME(features, dualSrcBlend) \
121 FEATURE_NAME(features, fillModeNonSolid) \
122 FEATURE_NAME(features, fragmentStoresAndAtomics) \
123 FEATURE_NAME(features, geometryShader) \
124 FEATURE_NAME(features, imageCubeArray) \
125 FEATURE_NAME(features, independentBlend) \
126 FEATURE_NAME(features, largePoints) \
127 FEATURE_NAME(features, logicOp) \
128 FEATURE_NAME(features, multiDrawIndirect) \
129 FEATURE_NAME(features, multiViewport) \
130 FEATURE_NAME(features, occlusionQueryPrecise) \
131 FEATURE_NAME(features, robustBufferAccess) \
132 FEATURE_NAME(features, samplerAnisotropy) \
133 FEATURE_NAME(features, sampleRateShading) \
134 FEATURE_NAME(features, shaderClipDistance) \
135 FEATURE_NAME(features, shaderCullDistance) \
136 FEATURE_NAME(features, shaderImageGatherExtended) \
137 FEATURE_NAME(features, shaderStorageImageWriteWithoutFormat) \
138 FEATURE_NAME(features, tessellationShader) \
139 FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \
140 FEATURE_NAME(features, wideLines) \
141 FEATURE_NAME(host_query_reset, hostQueryReset) \
142 FEATURE_NAME(robustness2, nullDescriptor) \
143 FEATURE_NAME(robustness2, robustBufferAccess2) \
144 FEATURE_NAME(robustness2, robustImageAccess2) \
145 FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \
146 FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \
147 FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
148 FEATURE_NAME(variable_pointer, variablePointers) \
149 FEATURE_NAME(variable_pointer, variablePointersStorageBuffer)
150
151// Define features where the absence of the feature may result in a degraded experience.
152#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \
153 FEATURE_NAME(custom_border_color, customBorderColors) \
154 FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
155 FEATURE_NAME(index_type_uint8, indexTypeUint8) \
156 FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
157 FEATURE_NAME(provoking_vertex, provokingVertexLast) \
158 FEATURE_NAME(shader_float16_int8, shaderFloat16) \
159 FEATURE_NAME(shader_float16_int8, shaderInt8) \
160 FEATURE_NAME(transform_feedback, transformFeedback) \
161 FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \
162 FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState)
163
14namespace Vulkan { 164namespace Vulkan {
15 165
16class NsightAftermathTracker; 166class NsightAftermathTracker;
@@ -88,67 +238,69 @@ public:
88 238
89 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. 239 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
90 u32 ApiVersion() const { 240 u32 ApiVersion() const {
91 return properties.apiVersion; 241 return properties.properties.apiVersion;
92 } 242 }
93 243
94 /// Returns the current driver version provided in Vulkan-formatted version numbers. 244 /// Returns the current driver version provided in Vulkan-formatted version numbers.
95 u32 GetDriverVersion() const { 245 u32 GetDriverVersion() const {
96 return properties.driverVersion; 246 return properties.properties.driverVersion;
97 } 247 }
98 248
99 /// Returns the device name. 249 /// Returns the device name.
100 std::string_view GetModelName() const { 250 std::string_view GetModelName() const {
101 return properties.deviceName; 251 return properties.properties.deviceName;
102 } 252 }
103 253
104 /// Returns the driver ID. 254 /// Returns the driver ID.
105 VkDriverIdKHR GetDriverID() const { 255 VkDriverIdKHR GetDriverID() const {
106 return driver_id; 256 return properties.driver.driverID;
107 } 257 }
108 258
259 bool ShouldBoostClocks() const;
260
109 /// Returns uniform buffer alignment requeriment. 261 /// Returns uniform buffer alignment requeriment.
110 VkDeviceSize GetUniformBufferAlignment() const { 262 VkDeviceSize GetUniformBufferAlignment() const {
111 return properties.limits.minUniformBufferOffsetAlignment; 263 return properties.properties.limits.minUniformBufferOffsetAlignment;
112 } 264 }
113 265
114 /// Returns storage alignment requeriment. 266 /// Returns storage alignment requeriment.
115 VkDeviceSize GetStorageBufferAlignment() const { 267 VkDeviceSize GetStorageBufferAlignment() const {
116 return properties.limits.minStorageBufferOffsetAlignment; 268 return properties.properties.limits.minStorageBufferOffsetAlignment;
117 } 269 }
118 270
119 /// Returns the maximum range for storage buffers. 271 /// Returns the maximum range for storage buffers.
120 VkDeviceSize GetMaxStorageBufferRange() const { 272 VkDeviceSize GetMaxStorageBufferRange() const {
121 return properties.limits.maxStorageBufferRange; 273 return properties.properties.limits.maxStorageBufferRange;
122 } 274 }
123 275
124 /// Returns the maximum size for push constants. 276 /// Returns the maximum size for push constants.
125 VkDeviceSize GetMaxPushConstantsSize() const { 277 VkDeviceSize GetMaxPushConstantsSize() const {
126 return properties.limits.maxPushConstantsSize; 278 return properties.properties.limits.maxPushConstantsSize;
127 } 279 }
128 280
129 /// Returns the maximum size for shared memory. 281 /// Returns the maximum size for shared memory.
130 u32 GetMaxComputeSharedMemorySize() const { 282 u32 GetMaxComputeSharedMemorySize() const {
131 return properties.limits.maxComputeSharedMemorySize; 283 return properties.properties.limits.maxComputeSharedMemorySize;
132 } 284 }
133 285
134 /// Returns float control properties of the device. 286 /// Returns float control properties of the device.
135 const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { 287 const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
136 return float_controls; 288 return properties.float_controls;
137 } 289 }
138 290
139 /// Returns true if ASTC is natively supported. 291 /// Returns true if ASTC is natively supported.
140 bool IsOptimalAstcSupported() const { 292 bool IsOptimalAstcSupported() const {
141 return is_optimal_astc_supported; 293 return features.features.textureCompressionASTC_LDR;
142 } 294 }
143 295
144 /// Returns true if the device supports float16 natively. 296 /// Returns true if the device supports float16 natively.
145 bool IsFloat16Supported() const { 297 bool IsFloat16Supported() const {
146 return is_float16_supported; 298 return features.shader_float16_int8.shaderFloat16;
147 } 299 }
148 300
149 /// Returns true if the device supports int8 natively. 301 /// Returns true if the device supports int8 natively.
150 bool IsInt8Supported() const { 302 bool IsInt8Supported() const {
151 return is_int8_supported; 303 return features.shader_float16_int8.shaderInt8;
152 } 304 }
153 305
154 /// Returns true if the device warp size can potentially be bigger than guest's warp size. 306 /// Returns true if the device warp size can potentially be bigger than guest's warp size.
@@ -158,32 +310,32 @@ public:
158 310
159 /// Returns true if the device can be forced to use the guest warp size. 311 /// Returns true if the device can be forced to use the guest warp size.
160 bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { 312 bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
161 return guest_warp_stages & stage; 313 return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
162 } 314 }
163 315
164 /// Returns the maximum number of push descriptors. 316 /// Returns the maximum number of push descriptors.
165 u32 MaxPushDescriptors() const { 317 u32 MaxPushDescriptors() const {
166 return max_push_descriptors; 318 return properties.push_descriptor.maxPushDescriptors;
167 } 319 }
168 320
169 /// Returns true if formatless image load is supported. 321 /// Returns true if formatless image load is supported.
170 bool IsFormatlessImageLoadSupported() const { 322 bool IsFormatlessImageLoadSupported() const {
171 return is_formatless_image_load_supported; 323 return features.features.shaderStorageImageReadWithoutFormat;
172 } 324 }
173 325
174 /// Returns true if shader int64 is supported. 326 /// Returns true if shader int64 is supported.
175 bool IsShaderInt64Supported() const { 327 bool IsShaderInt64Supported() const {
176 return is_shader_int64_supported; 328 return features.features.shaderInt64;
177 } 329 }
178 330
179 /// Returns true if shader int16 is supported. 331 /// Returns true if shader int16 is supported.
180 bool IsShaderInt16Supported() const { 332 bool IsShaderInt16Supported() const {
181 return is_shader_int16_supported; 333 return features.features.shaderInt16;
182 } 334 }
183 335
184 // Returns true if depth bounds is supported. 336 // Returns true if depth bounds is supported.
185 bool IsDepthBoundsSupported() const { 337 bool IsDepthBoundsSupported() const {
186 return is_depth_bounds_supported; 338 return features.features.depthBounds;
187 } 339 }
188 340
189 /// Returns true when blitting from and to depth stencil images is supported. 341 /// Returns true when blitting from and to depth stencil images is supported.
@@ -193,151 +345,151 @@ public:
193 345
194 /// Returns true if the device supports VK_NV_viewport_swizzle. 346 /// Returns true if the device supports VK_NV_viewport_swizzle.
195 bool IsNvViewportSwizzleSupported() const { 347 bool IsNvViewportSwizzleSupported() const {
196 return nv_viewport_swizzle; 348 return extensions.viewport_swizzle;
197 } 349 }
198 350
199 /// Returns true if the device supports VK_NV_viewport_array2. 351 /// Returns true if the device supports VK_NV_viewport_array2.
200 bool IsNvViewportArray2Supported() const { 352 bool IsNvViewportArray2Supported() const {
201 return nv_viewport_array2; 353 return extensions.viewport_array2;
202 } 354 }
203 355
204 /// Returns true if the device supports VK_NV_geometry_shader_passthrough. 356 /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
205 bool IsNvGeometryShaderPassthroughSupported() const { 357 bool IsNvGeometryShaderPassthroughSupported() const {
206 return nv_geometry_shader_passthrough; 358 return extensions.geometry_shader_passthrough;
207 } 359 }
208 360
209 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. 361 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
210 bool IsKhrUniformBufferStandardLayoutSupported() const { 362 bool IsKhrUniformBufferStandardLayoutSupported() const {
211 return khr_uniform_buffer_standard_layout; 363 return extensions.uniform_buffer_standard_layout;
212 } 364 }
213 365
214 /// Returns true if the device supports VK_KHR_push_descriptor. 366 /// Returns true if the device supports VK_KHR_push_descriptor.
215 bool IsKhrPushDescriptorSupported() const { 367 bool IsKhrPushDescriptorSupported() const {
216 return khr_push_descriptor; 368 return extensions.push_descriptor;
217 } 369 }
218 370
219 /// Returns true if VK_KHR_pipeline_executable_properties is enabled. 371 /// Returns true if VK_KHR_pipeline_executable_properties is enabled.
220 bool IsKhrPipelineExecutablePropertiesEnabled() const { 372 bool IsKhrPipelineExecutablePropertiesEnabled() const {
221 return khr_pipeline_executable_properties; 373 return extensions.pipeline_executable_properties;
222 } 374 }
223 375
224 /// Returns true if VK_KHR_swapchain_mutable_format is enabled. 376 /// Returns true if VK_KHR_swapchain_mutable_format is enabled.
225 bool IsKhrSwapchainMutableFormatEnabled() const { 377 bool IsKhrSwapchainMutableFormatEnabled() const {
226 return khr_swapchain_mutable_format; 378 return extensions.swapchain_mutable_format;
227 } 379 }
228 380
229 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. 381 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
230 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { 382 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
231 return khr_workgroup_memory_explicit_layout; 383 return extensions.workgroup_memory_explicit_layout;
232 } 384 }
233 385
234 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. 386 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
235 bool IsTopologyListPrimitiveRestartSupported() const { 387 bool IsTopologyListPrimitiveRestartSupported() const {
236 return is_topology_list_restart_supported; 388 return features.primitive_topology_list_restart.primitiveTopologyListRestart;
237 } 389 }
238 390
239 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. 391 /// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
240 bool IsPatchListPrimitiveRestartSupported() const { 392 bool IsPatchListPrimitiveRestartSupported() const {
241 return is_patch_list_restart_supported; 393 return features.primitive_topology_list_restart.primitiveTopologyPatchListRestart;
242 } 394 }
243 395
244 /// Returns true if the device supports VK_EXT_index_type_uint8. 396 /// Returns true if the device supports VK_EXT_index_type_uint8.
245 bool IsExtIndexTypeUint8Supported() const { 397 bool IsExtIndexTypeUint8Supported() const {
246 return ext_index_type_uint8; 398 return extensions.index_type_uint8;
247 } 399 }
248 400
249 /// Returns true if the device supports VK_EXT_sampler_filter_minmax. 401 /// Returns true if the device supports VK_EXT_sampler_filter_minmax.
250 bool IsExtSamplerFilterMinmaxSupported() const { 402 bool IsExtSamplerFilterMinmaxSupported() const {
251 return ext_sampler_filter_minmax; 403 return extensions.sampler_filter_minmax;
252 } 404 }
253 405
254 /// Returns true if the device supports VK_EXT_depth_range_unrestricted. 406 /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
255 bool IsExtDepthRangeUnrestrictedSupported() const { 407 bool IsExtDepthRangeUnrestrictedSupported() const {
256 return ext_depth_range_unrestricted; 408 return extensions.depth_range_unrestricted;
257 } 409 }
258 410
259 /// Returns true if the device supports VK_EXT_depth_clip_control. 411 /// Returns true if the device supports VK_EXT_depth_clip_control.
260 bool IsExtDepthClipControlSupported() const { 412 bool IsExtDepthClipControlSupported() const {
261 return ext_depth_clip_control; 413 return extensions.depth_clip_control;
262 } 414 }
263 415
264 /// Returns true if the device supports VK_EXT_shader_viewport_index_layer. 416 /// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
265 bool IsExtShaderViewportIndexLayerSupported() const { 417 bool IsExtShaderViewportIndexLayerSupported() const {
266 return ext_shader_viewport_index_layer; 418 return extensions.shader_viewport_index_layer;
267 } 419 }
268 420
269 /// Returns true if the device supports VK_EXT_subgroup_size_control. 421 /// Returns true if the device supports VK_EXT_subgroup_size_control.
270 bool IsExtSubgroupSizeControlSupported() const { 422 bool IsExtSubgroupSizeControlSupported() const {
271 return ext_subgroup_size_control; 423 return extensions.subgroup_size_control;
272 } 424 }
273 425
274 /// Returns true if the device supports VK_EXT_transform_feedback. 426 /// Returns true if the device supports VK_EXT_transform_feedback.
275 bool IsExtTransformFeedbackSupported() const { 427 bool IsExtTransformFeedbackSupported() const {
276 return ext_transform_feedback; 428 return extensions.transform_feedback;
277 } 429 }
278 430
279 /// Returns true if the device supports VK_EXT_custom_border_color. 431 /// Returns true if the device supports VK_EXT_custom_border_color.
280 bool IsExtCustomBorderColorSupported() const { 432 bool IsExtCustomBorderColorSupported() const {
281 return ext_custom_border_color; 433 return extensions.custom_border_color;
282 } 434 }
283 435
284 /// Returns true if the device supports VK_EXT_extended_dynamic_state. 436 /// Returns true if the device supports VK_EXT_extended_dynamic_state.
285 bool IsExtExtendedDynamicStateSupported() const { 437 bool IsExtExtendedDynamicStateSupported() const {
286 return ext_extended_dynamic_state; 438 return extensions.extended_dynamic_state;
287 } 439 }
288 440
289 /// Returns true if the device supports VK_EXT_extended_dynamic_state2. 441 /// Returns true if the device supports VK_EXT_extended_dynamic_state2.
290 bool IsExtExtendedDynamicState2Supported() const { 442 bool IsExtExtendedDynamicState2Supported() const {
291 return ext_extended_dynamic_state_2; 443 return extensions.extended_dynamic_state2;
292 } 444 }
293 445
294 bool IsExtExtendedDynamicState2ExtrasSupported() const { 446 bool IsExtExtendedDynamicState2ExtrasSupported() const {
295 return ext_extended_dynamic_state_2_extra; 447 return features.extended_dynamic_state2.extendedDynamicState2LogicOp;
296 } 448 }
297 449
298 /// Returns true if the device supports VK_EXT_extended_dynamic_state3. 450 /// Returns true if the device supports VK_EXT_extended_dynamic_state3.
299 bool IsExtExtendedDynamicState3Supported() const { 451 bool IsExtExtendedDynamicState3Supported() const {
300 return ext_extended_dynamic_state_3; 452 return extensions.extended_dynamic_state3;
301 } 453 }
302 454
303 /// Returns true if the device supports VK_EXT_extended_dynamic_state3. 455 /// Returns true if the device supports VK_EXT_extended_dynamic_state3.
304 bool IsExtExtendedDynamicState3BlendingSupported() const { 456 bool IsExtExtendedDynamicState3BlendingSupported() const {
305 return ext_extended_dynamic_state_3_blend; 457 return dynamic_state3_blending;
306 } 458 }
307 459
308 /// Returns true if the device supports VK_EXT_extended_dynamic_state3. 460 /// Returns true if the device supports VK_EXT_extended_dynamic_state3.
309 bool IsExtExtendedDynamicState3EnablesSupported() const { 461 bool IsExtExtendedDynamicState3EnablesSupported() const {
310 return ext_extended_dynamic_state_3_enables; 462 return dynamic_state3_enables;
311 } 463 }
312 464
313 /// Returns true if the device supports VK_EXT_line_rasterization. 465 /// Returns true if the device supports VK_EXT_line_rasterization.
314 bool IsExtLineRasterizationSupported() const { 466 bool IsExtLineRasterizationSupported() const {
315 return ext_line_rasterization; 467 return extensions.line_rasterization;
316 } 468 }
317 469
318 /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. 470 /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
319 bool IsExtVertexInputDynamicStateSupported() const { 471 bool IsExtVertexInputDynamicStateSupported() const {
320 return ext_vertex_input_dynamic_state; 472 return extensions.vertex_input_dynamic_state;
321 } 473 }
322 474
323 /// Returns true if the device supports VK_EXT_shader_stencil_export. 475 /// Returns true if the device supports VK_EXT_shader_stencil_export.
324 bool IsExtShaderStencilExportSupported() const { 476 bool IsExtShaderStencilExportSupported() const {
325 return ext_shader_stencil_export; 477 return extensions.shader_stencil_export;
326 } 478 }
327 479
328 /// Returns true if the device supports VK_EXT_conservative_rasterization. 480 /// Returns true if the device supports VK_EXT_conservative_rasterization.
329 bool IsExtConservativeRasterizationSupported() const { 481 bool IsExtConservativeRasterizationSupported() const {
330 return ext_conservative_rasterization; 482 return extensions.conservative_rasterization;
331 } 483 }
332 484
333 /// Returns true if the device supports VK_EXT_provoking_vertex. 485 /// Returns true if the device supports VK_EXT_provoking_vertex.
334 bool IsExtProvokingVertexSupported() const { 486 bool IsExtProvokingVertexSupported() const {
335 return ext_provoking_vertex; 487 return extensions.provoking_vertex;
336 } 488 }
337 489
338 /// Returns true if the device supports VK_KHR_shader_atomic_int64. 490 /// Returns true if the device supports VK_KHR_shader_atomic_int64.
339 bool IsExtShaderAtomicInt64Supported() const { 491 bool IsExtShaderAtomicInt64Supported() const {
340 return ext_shader_atomic_int64; 492 return extensions.shader_atomic_int64;
341 } 493 }
342 494
343 /// Returns the minimum supported version of SPIR-V. 495 /// Returns the minimum supported version of SPIR-V.
@@ -345,7 +497,7 @@ public:
345 if (instance_version >= VK_API_VERSION_1_3) { 497 if (instance_version >= VK_API_VERSION_1_3) {
346 return 0x00010600U; 498 return 0x00010600U;
347 } 499 }
348 if (khr_spirv_1_4) { 500 if (extensions.spirv_1_4) {
349 return 0x00010400U; 501 return 0x00010400U;
350 } 502 }
351 return 0x00010000U; 503 return 0x00010000U;
@@ -363,11 +515,11 @@ public:
363 515
364 /// Returns the vendor name reported from Vulkan. 516 /// Returns the vendor name reported from Vulkan.
365 std::string_view GetVendorName() const { 517 std::string_view GetVendorName() const {
366 return vendor_name; 518 return properties.driver.driverName;
367 } 519 }
368 520
369 /// Returns the list of available extensions. 521 /// Returns the list of available extensions.
370 const std::vector<std::string>& GetAvailableExtensions() const { 522 const std::set<std::string, std::less<>>& GetAvailableExtensions() const {
371 return supported_extensions; 523 return supported_extensions;
372 } 524 }
373 525
@@ -376,7 +528,7 @@ public:
376 } 528 }
377 529
378 bool CanReportMemoryUsage() const { 530 bool CanReportMemoryUsage() const {
379 return ext_memory_budget; 531 return extensions.memory_budget;
380 } 532 }
381 533
382 u64 GetDeviceMemoryUsage() const; 534 u64 GetDeviceMemoryUsage() const;
@@ -398,36 +550,29 @@ public:
398 } 550 }
399 551
400 bool HasNullDescriptor() const { 552 bool HasNullDescriptor() const {
401 return has_null_descriptor; 553 return features.robustness2.nullDescriptor;
402 } 554 }
403 555
404 u32 GetMaxVertexInputAttributes() const { 556 u32 GetMaxVertexInputAttributes() const {
405 return max_vertex_input_attributes; 557 return properties.properties.limits.maxVertexInputAttributes;
406 } 558 }
407 559
408 u32 GetMaxVertexInputBindings() const { 560 u32 GetMaxVertexInputBindings() const {
409 return max_vertex_input_bindings; 561 return properties.properties.limits.maxVertexInputBindings;
410 } 562 }
411 563
412private: 564private:
413 /// Checks if the physical device is suitable. 565 /// Checks if the physical device is suitable and configures the object state
414 void CheckSuitability(bool requires_swapchain) const; 566 /// with all necessary info about its properties.
567 bool GetSuitability(bool requires_swapchain);
415 568
416 /// Loads extensions into a vector and stores available ones in this object. 569 // Remove extensions which have incomplete feature support.
417 std::vector<const char*> LoadExtensions(bool requires_surface); 570 void RemoveUnsuitableExtensions();
571 void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
418 572
419 /// Sets up queue families. 573 /// Sets up queue families.
420 void SetupFamilies(VkSurfaceKHR surface); 574 void SetupFamilies(VkSurfaceKHR surface);
421 575
422 /// Sets up device features.
423 void SetupFeatures();
424
425 /// Sets up device properties.
426 void SetupProperties();
427
428 /// Collects telemetry information from the device.
429 void CollectTelemetryParameters();
430
431 /// Collects information about attached tools. 576 /// Collects information about attached tools.
432 void CollectToolingInfo(); 577 void CollectToolingInfo();
433 578
@@ -438,91 +583,93 @@ private:
438 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 583 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
439 584
440 /// Returns true if ASTC textures are natively supported. 585 /// Returns true if ASTC textures are natively supported.
441 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; 586 bool ComputeIsOptimalAstcSupported() const;
442 587
443 /// Returns true if the device natively supports blitting depth stencil images. 588 /// Returns true if the device natively supports blitting depth stencil images.
444 bool TestDepthStencilBlits() const; 589 bool TestDepthStencilBlits() const;
445 590
446 VkInstance instance; ///< Vulkan instance. 591private:
447 vk::DeviceDispatch dld; ///< Device function pointers. 592 VkInstance instance; ///< Vulkan instance.
448 vk::PhysicalDevice physical; ///< Physical device. 593 vk::DeviceDispatch dld; ///< Device function pointers.
449 VkPhysicalDeviceProperties properties; ///< Device properties. 594 vk::PhysicalDevice physical; ///< Physical device.
450 VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. 595 vk::Device logical; ///< Logical device.
451 vk::Device logical; ///< Logical device. 596 vk::Queue graphics_queue; ///< Main graphics queue.
452 vk::Queue graphics_queue; ///< Main graphics queue. 597 vk::Queue present_queue; ///< Main present queue.
453 vk::Queue present_queue; ///< Main present queue. 598 u32 instance_version{}; ///< Vulkan instance version.
454 u32 instance_version{}; ///< Vulkan onstance version. 599 u32 graphics_family{}; ///< Main graphics queue family index.
455 u32 graphics_family{}; ///< Main graphics queue family index. 600 u32 present_family{}; ///< Main present queue family index.
456 u32 present_family{}; ///< Main present queue family index. 601
457 VkDriverIdKHR driver_id{}; ///< Driver ID. 602 struct Extensions {
458 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 603#define EXTENSION(prefix, macro_name, var_name) bool var_name{};
459 u64 device_access_memory{}; ///< Total size of device local memory in bytes. 604#define FEATURE(prefix, struct_name, macro_name, var_name) bool var_name{};
460 u32 max_push_descriptors{}; ///< Maximum number of push descriptors 605
461 u32 sets_per_pool{}; ///< Sets per Description Pool 606 FOR_EACH_VK_FEATURE_1_1(FEATURE);
462 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 607 FOR_EACH_VK_FEATURE_1_2(FEATURE);
463 bool is_float16_supported{}; ///< Support for float16 arithmetic. 608 FOR_EACH_VK_FEATURE_1_3(FEATURE);
464 bool is_int8_supported{}; ///< Support for int8 arithmetic. 609 FOR_EACH_VK_FEATURE_EXT(FEATURE);
465 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 610 FOR_EACH_VK_EXTENSION(EXTENSION);
466 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 611 FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
467 bool is_depth_bounds_supported{}; ///< Support for depth bounds. 612
468 bool is_shader_float64_supported{}; ///< Support for float64. 613#undef EXTENSION
469 bool is_shader_int64_supported{}; ///< Support for int64. 614#undef FEATURE
470 bool is_shader_int16_supported{}; ///< Support for int16. 615 };
471 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. 616
472 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. 617 struct Features {
473 bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list 618#define FEATURE_CORE(prefix, struct_name, macro_name, var_name) \
474 ///< topologies. 619 VkPhysicalDevice##struct_name##Features var_name{};
475 bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. 620#define FEATURE_EXT(prefix, struct_name, macro_name, var_name) \
476 bool is_integrated{}; ///< Is GPU an iGPU. 621 VkPhysicalDevice##struct_name##Features##prefix var_name{};
477 bool is_virtual{}; ///< Is GPU a virtual GPU. 622
478 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. 623 FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE);
479 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 624 FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE);
480 bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. 625 FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE);
481 bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. 626 FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT);
482 bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. 627
483 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. 628#undef FEATURE_CORE
484 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. 629#undef FEATURE_EXT
485 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. 630
486 bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. 631 VkPhysicalDeviceFeatures features{};
487 bool khr_pipeline_executable_properties{}; ///< Support for executable properties. 632 };
488 bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. 633
489 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 634 struct Properties {
490 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. 635 VkPhysicalDeviceDriverProperties driver{};
491 bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control 636 VkPhysicalDeviceFloatControlsProperties float_controls{};
492 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 637 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
493 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 638 VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
494 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. 639 VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
495 bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. 640
496 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 641 VkPhysicalDeviceProperties properties{};
497 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 642 };
498 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 643
499 bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2. 644 Extensions extensions{};
500 bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2. 645 Features features{};
501 bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3. 646 Properties properties{};
502 bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3. 647
503 bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3. 648 VkPhysicalDeviceFeatures2 features2{};
504 bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. 649 VkPhysicalDeviceProperties2 properties2{};
505 bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. 650
506 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 651 // Misc features
507 bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. 652 bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats.
508 bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. 653 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
509 bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. 654 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
510 bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. 655 bool is_integrated{}; ///< Is GPU an iGPU.
511 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 656 bool is_virtual{}; ///< Is GPU a virtual GPU.
512 bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit 657 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
513 bool has_renderdoc{}; ///< Has RenderDoc attached 658 bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
514 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 659 bool has_renderdoc{}; ///< Has RenderDoc attached
515 bool supports_d24_depth{}; ///< Supports D24 depth buffers. 660 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
516 bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. 661 bool supports_d24_depth{}; ///< Supports D24 depth buffers.
517 bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. 662 bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
518 bool has_null_descriptor{}; ///< Has support for null descriptors. 663 bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
519 u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline 664 bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
520 u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline 665 bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
666 u64 device_access_memory{}; ///< Total size of device local memory in bytes.
667 u32 sets_per_pool{}; ///< Sets per Description Pool
521 668
522 // Telemetry parameters 669 // Telemetry parameters
523 std::string vendor_name; ///< Device's driver name. 670 std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
524 std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. 671 std::set<std::string, std::less<>> loaded_extensions; ///< Loaded Vulkan extensions.
525 std::vector<size_t> valid_heap_memory; ///< Heaps used. 672 std::vector<size_t> valid_heap_memory; ///< Heaps used.
526 673
527 /// Format properties dictionary. 674 /// Format properties dictionary.
528 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 675 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/yuzu/Info.plist b/src/yuzu/Info.plist
index 0eb377926..f05f3186c 100644
--- a/src/yuzu/Info.plist
+++ b/src/yuzu/Info.plist
@@ -34,6 +34,8 @@ SPDX-License-Identifier: GPL-2.0-or-later
34 <string></string> 34 <string></string>
35 <key>CSResourcesFileMapped</key> 35 <key>CSResourcesFileMapped</key>
36 <true/> 36 <true/>
37 <key>LSApplicationCategoryType</key>
38 <string>public.app-category.games</string>
37 <key>LSRequiresCarbon</key> 39 <key>LSRequiresCarbon</key>
38 <true/> 40 <true/>
39 <key>NSHumanReadableCopyright</key> 41 <key>NSHumanReadableCopyright</key>
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index fbfa3ba35..0db62baa3 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -690,6 +690,7 @@ void Config::ReadRendererValues() {
690 qt_config->beginGroup(QStringLiteral("Renderer")); 690 qt_config->beginGroup(QStringLiteral("Renderer"));
691 691
692 ReadGlobalSetting(Settings::values.renderer_backend); 692 ReadGlobalSetting(Settings::values.renderer_backend);
693 ReadGlobalSetting(Settings::values.renderer_force_max_clock);
693 ReadGlobalSetting(Settings::values.vulkan_device); 694 ReadGlobalSetting(Settings::values.vulkan_device);
694 ReadGlobalSetting(Settings::values.fullscreen_mode); 695 ReadGlobalSetting(Settings::values.fullscreen_mode);
695 ReadGlobalSetting(Settings::values.aspect_ratio); 696 ReadGlobalSetting(Settings::values.aspect_ratio);
@@ -1306,6 +1307,9 @@ void Config::SaveRendererValues() {
1306 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), 1307 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
1307 static_cast<u32>(Settings::values.renderer_backend.GetDefault()), 1308 static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
1308 Settings::values.renderer_backend.UsingGlobal()); 1309 Settings::values.renderer_backend.UsingGlobal());
1310 WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()),
1311 static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)),
1312 static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault()));
1309 WriteGlobalSetting(Settings::values.vulkan_device); 1313 WriteGlobalSetting(Settings::values.vulkan_device);
1310 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), 1314 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
1311 static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), 1315 static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)),
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index a3fbe2ad0..fdf8485ce 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -25,6 +25,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
25 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 25 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
26 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 26 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
27 27
28 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
28 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); 29 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
29 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 30 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
30 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 31 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
@@ -39,6 +40,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
39 Settings::values.max_anisotropy.GetValue()); 40 Settings::values.max_anisotropy.GetValue());
40 } else { 41 } else {
41 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); 42 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
43 ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock,
44 &Settings::values.renderer_force_max_clock);
42 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, 45 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
43 &Settings::values.max_anisotropy); 46 &Settings::values.max_anisotropy);
44 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, 47 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
@@ -50,6 +53,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
50 53
51void ConfigureGraphicsAdvanced::ApplyConfiguration() { 54void ConfigureGraphicsAdvanced::ApplyConfiguration() {
52 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); 55 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
57 ui->renderer_force_max_clock,
58 renderer_force_max_clock);
53 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, 59 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
54 ui->anisotropic_filtering_combobox); 60 ui->anisotropic_filtering_combobox);
55 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); 61 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
@@ -81,6 +87,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
81 // Disable if not global (only happens during game) 87 // Disable if not global (only happens during game)
82 if (Settings::IsConfiguringGlobal()) { 88 if (Settings::IsConfiguringGlobal()) {
83 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); 89 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
90 ui->renderer_force_max_clock->setEnabled(
91 Settings::values.renderer_force_max_clock.UsingGlobal());
84 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); 92 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
85 ui->use_asynchronous_shaders->setEnabled( 93 ui->use_asynchronous_shaders->setEnabled(
86 Settings::values.use_asynchronous_shaders.UsingGlobal()); 94 Settings::values.use_asynchronous_shaders.UsingGlobal());
@@ -95,6 +103,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
95 return; 103 return;
96 } 104 }
97 105
106 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
107 Settings::values.renderer_force_max_clock,
108 renderer_force_max_clock);
98 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); 109 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
99 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, 110 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
100 Settings::values.use_asynchronous_shaders, 111 Settings::values.use_asynchronous_shaders,
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 891efc068..df557d585 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -36,6 +36,7 @@ private:
36 36
37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; 37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
38 38
39 ConfigurationShared::CheckState renderer_force_max_clock;
39 ConfigurationShared::CheckState use_vsync; 40 ConfigurationShared::CheckState use_vsync;
40 ConfigurationShared::CheckState use_asynchronous_shaders; 41 ConfigurationShared::CheckState use_asynchronous_shaders;
41 ConfigurationShared::CheckState use_fast_gpu_time; 42 ConfigurationShared::CheckState use_fast_gpu_time;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index ccbdcf08f..061885e30 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -70,6 +70,16 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QCheckBox" name="renderer_force_max_clock">
74 <property name="toolTip">
75 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
76 </property>
77 <property name="text">
78 <string>Force maximum clocks (Vulkan only)</string>
79 </property>
80 </widget>
81 </item>
82 <item>
73 <widget class="QCheckBox" name="use_vsync"> 83 <widget class="QCheckBox" name="use_vsync">
74 <property name="toolTip"> 84 <property name="toolTip">
75 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> 85 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 4f1d5e79e..571eacf9f 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -1839,9 +1839,11 @@ void GMainWindow::OnEmulationStopTimeExpired() {
1839 1839
1840void GMainWindow::OnEmulationStopped() { 1840void GMainWindow::OnEmulationStopped() {
1841 shutdown_timer.stop(); 1841 shutdown_timer.stop();
1842 emu_thread->disconnect(); 1842 if (emu_thread) {
1843 emu_thread->wait(); 1843 emu_thread->disconnect();
1844 emu_thread = nullptr; 1844 emu_thread->wait();
1845 emu_thread.reset();
1846 }
1845 1847
1846 if (shutdown_dialog) { 1848 if (shutdown_dialog) {
1847 shutdown_dialog->deleteLater(); 1849 shutdown_dialog->deleteLater();
@@ -3029,6 +3031,8 @@ void GMainWindow::OnStopGame() {
3029 3031
3030 if (OnShutdownBegin()) { 3032 if (OnShutdownBegin()) {
3031 OnShutdownBeginDialog(); 3033 OnShutdownBeginDialog();
3034 } else {
3035 OnEmulationStopped();
3032 } 3036 }
3033} 3037}
3034 3038
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index b2d690bb6..527017282 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -296,6 +296,7 @@ void Config::ReadValues() {
296 296
297 // Renderer 297 // Renderer
298 ReadSetting("Renderer", Settings::values.renderer_backend); 298 ReadSetting("Renderer", Settings::values.renderer_backend);
299 ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
299 ReadSetting("Renderer", Settings::values.renderer_debug); 300 ReadSetting("Renderer", Settings::values.renderer_debug);
300 ReadSetting("Renderer", Settings::values.renderer_shader_feedback); 301 ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
301 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); 302 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);