summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp115
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h21
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h14
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp10
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp175
-rw-r--r--src/shader_recompiler/environment.h4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h6
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp6
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc18
-rw-r--r--src/shader_recompiler/frontend/ir/program.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp197
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp16
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp6
-rw-r--r--src/shader_recompiler/profile.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp47
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp34
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h42
20 files changed, 730 insertions, 36 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 55b846c84..003cbefb1 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC
14 backend/spirv/emit_spirv_logical.cpp 14 backend/spirv/emit_spirv_logical.cpp
15 backend/spirv/emit_spirv_memory.cpp 15 backend/spirv/emit_spirv_memory.cpp
16 backend/spirv/emit_spirv_select.cpp 16 backend/spirv/emit_spirv_select.cpp
17 backend/spirv/emit_spirv_shared_memory.cpp
17 backend/spirv/emit_spirv_special.cpp 18 backend/spirv/emit_spirv_special.cpp
18 backend/spirv/emit_spirv_undefined.cpp 19 backend/spirv/emit_spirv_undefined.cpp
19 backend/spirv/emit_spirv_warp.cpp 20 backend/spirv/emit_spirv_warp.cpp
@@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC
111 frontend/maxwell/translate/impl/load_constant.cpp 112 frontend/maxwell/translate/impl/load_constant.cpp
112 frontend/maxwell/translate/impl/load_effective_address.cpp 113 frontend/maxwell/translate/impl/load_effective_address.cpp
113 frontend/maxwell/translate/impl/load_store_attribute.cpp 114 frontend/maxwell/translate/impl/load_store_attribute.cpp
115 frontend/maxwell/translate/impl/load_store_local_shared.cpp
114 frontend/maxwell/translate/impl/load_store_memory.cpp 116 frontend/maxwell/translate/impl/load_store_memory.cpp
115 frontend/maxwell/translate/impl/logic_operation.cpp 117 frontend/maxwell/translate/impl/logic_operation.cpp
116 frontend/maxwell/translate/impl/logic_operation_three_input.cpp 118 frontend/maxwell/translate/impl/logic_operation_three_input.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index a8ca33c1d..96d0e9b4d 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -9,6 +9,7 @@
9#include <fmt/format.h> 9#include <fmt/format.h>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/div_ceil.h"
12#include "shader_recompiler/backend/spirv/emit_context.h" 13#include "shader_recompiler/backend/spirv/emit_context.h"
13 14
14namespace Shader::Backend::SPIRV { 15namespace Shader::Backend::SPIRV {
@@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
96} 97}
97 98
98EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) 99EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
99 : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { 100 : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
100 AddCapability(spv::Capability::Shader); 101 AddCapability(spv::Capability::Shader);
101 DefineCommonTypes(program.info); 102 DefineCommonTypes(program.info);
102 DefineCommonConstants(); 103 DefineCommonConstants();
103 DefineInterfaces(program.info); 104 DefineInterfaces(program.info);
105 DefineLocalMemory(program);
106 DefineSharedMemory(program);
104 DefineConstantBuffers(program.info, binding); 107 DefineConstantBuffers(program.info, binding);
105 DefineStorageBuffers(program.info, binding); 108 DefineStorageBuffers(program.info, binding);
106 DefineTextures(program.info, binding); 109 DefineTextures(program.info, binding);
@@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) {
143 F32.Define(*this, TypeFloat(32), "f32"); 146 F32.Define(*this, TypeFloat(32), "f32");
144 U32.Define(*this, TypeInt(32, false), "u32"); 147 U32.Define(*this, TypeInt(32, false), "u32");
145 148
149 private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32");
150
146 input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); 151 input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
147 input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); 152 input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
148 input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); 153 input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32");
@@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) {
184 DefineOutputs(info); 189 DefineOutputs(info);
185} 190}
186 191
192void EmitContext::DefineLocalMemory(const IR::Program& program) {
193 if (program.local_memory_size == 0) {
194 return;
195 }
196 const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)};
197 const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
198 const Id pointer{TypePointer(spv::StorageClass::Private, type)};
199 local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private);
200 if (profile.supported_spirv >= 0x00010400) {
201 interfaces.push_back(local_memory);
202 }
203}
204
205void EmitContext::DefineSharedMemory(const IR::Program& program) {
206 if (program.shared_memory_size == 0) {
207 return;
208 }
209 const auto make{[&](Id element_type, u32 element_size) {
210 const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)};
211 const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))};
212 Decorate(array_type, spv::Decoration::ArrayStride, element_size);
213
214 const Id struct_type{TypeStruct(array_type)};
215 MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U);
216 Decorate(struct_type, spv::Decoration::Block);
217
218 const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)};
219 const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)};
220 const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)};
221 Decorate(variable, spv::Decoration::Aliased);
222 interfaces.push_back(variable);
223
224 return std::make_pair(variable, element_pointer);
225 }};
226 if (profile.support_explicit_workgroup_layout) {
227 AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
228 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
229 if (program.info.uses_int8) {
230 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
231 std::tie(shared_memory_u8, shared_u8) = make(U8, 1);
232 }
233 if (program.info.uses_int16) {
234 AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
235 std::tie(shared_memory_u16, shared_u16) = make(U16, 2);
236 }
237 std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4);
238 std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8);
239 std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16);
240 }
241 const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
242 const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
243 const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)};
244 shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
245 shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup);
246 interfaces.push_back(shared_memory_u32);
247
248 const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
249 const auto make_function{[&](u32 mask, u32 size) {
250 const Id loop_header{OpLabel()};
251 const Id continue_block{OpLabel()};
252 const Id merge_block{OpLabel()};
253
254 const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
255 const Id offset{OpFunctionParameter(U32[1])};
256 const Id insert_value{OpFunctionParameter(U32[1])};
257 AddLabel();
258 OpBranch(loop_header);
259
260 AddLabel(loop_header);
261 const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))};
262 const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))};
263 const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))};
264 const Id count{Constant(U32[1], size)};
265 OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
266 OpBranch(continue_block);
267
268 AddLabel(continue_block);
269 const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
270 const Id old_value{OpLoad(U32[1], word_pointer)};
271 const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
272 const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U),
273 u32_zero_value, u32_zero_value, new_value,
274 old_value)};
275 const Id success{OpIEqual(U1, atomic_res, old_value)};
276 OpBranchConditional(success, merge_block, loop_header);
277
278 AddLabel(merge_block);
279 OpReturn();
280 OpFunctionEnd();
281 return func;
282 }};
283 if (program.info.uses_int8) {
284 shared_store_u8_func = make_function(24, 8);
285 }
286 if (program.info.uses_int16) {
287 shared_store_u16_func = make_function(16, 16);
288 }
289}
290
187void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { 291void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
188 if (info.constant_buffer_descriptors.empty()) { 292 if (info.constant_buffer_descriptors.empty()) {
189 return; 293 return;
@@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
234 Decorate(id, spv::Decoration::Binding, binding); 338 Decorate(id, spv::Decoration::Binding, binding);
235 Decorate(id, spv::Decoration::DescriptorSet, 0U); 339 Decorate(id, spv::Decoration::DescriptorSet, 0U);
236 Name(id, fmt::format("ssbo{}", index)); 340 Name(id, fmt::format("ssbo{}", index));
341 if (profile.supported_spirv >= 0x00010400) {
342 interfaces.push_back(id);
343 }
237 std::fill_n(ssbos.data() + index, desc.count, id); 344 std::fill_n(ssbos.data() + index, desc.count, id);
238 index += desc.count; 345 index += desc.count;
239 binding += desc.count; 346 binding += desc.count;
@@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) {
261 .image_type{image_type}, 368 .image_type{image_type},
262 }); 369 });
263 } 370 }
371 if (profile.supported_spirv >= 0x00010400) {
372 interfaces.push_back(id);
373 }
264 binding += desc.count; 374 binding += desc.count;
265 } 375 }
266} 376}
@@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions:
363 for (size_t i = 0; i < desc.count; ++i) { 473 for (size_t i = 0; i < desc.count; ++i) {
364 cbufs[desc.index + i].*member_type = id; 474 cbufs[desc.index + i].*member_type = id;
365 } 475 }
476 if (profile.supported_spirv >= 0x00010400) {
477 interfaces.push_back(id);
478 }
366 binding += desc.count; 479 binding += desc.count;
367 } 480 }
368} 481}
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 01b7b665d..1a4e8221a 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -73,6 +73,14 @@ public:
73 73
74 UniformDefinitions uniform_types; 74 UniformDefinitions uniform_types;
75 75
76 Id private_u32{};
77
78 Id shared_u8{};
79 Id shared_u16{};
80 Id shared_u32{};
81 Id shared_u32x2{};
82 Id shared_u32x4{};
83
76 Id input_f32{}; 84 Id input_f32{};
77 Id input_u32{}; 85 Id input_u32{};
78 Id input_s32{}; 86 Id input_s32{};
@@ -96,6 +104,17 @@ public:
96 Id base_vertex{}; 104 Id base_vertex{};
97 Id front_face{}; 105 Id front_face{};
98 106
107 Id local_memory{};
108
109 Id shared_memory_u8{};
110 Id shared_memory_u16{};
111 Id shared_memory_u32{};
112 Id shared_memory_u32x2{};
113 Id shared_memory_u32x4{};
114
115 Id shared_store_u8_func{};
116 Id shared_store_u16_func{};
117
99 Id input_position{}; 118 Id input_position{};
100 std::array<Id, 32> input_generics{}; 119 std::array<Id, 32> input_generics{};
101 120
@@ -111,6 +130,8 @@ private:
111 void DefineCommonTypes(const Info& info); 130 void DefineCommonTypes(const Info& info);
112 void DefineCommonConstants(); 131 void DefineCommonConstants();
113 void DefineInterfaces(const Info& info); 132 void DefineInterfaces(const Info& info);
133 void DefineLocalMemory(const IR::Program& program);
134 void DefineSharedMemory(const IR::Program& program);
114 void DefineConstantBuffers(const Info& info, u32& binding); 135 void DefineConstantBuffers(const Info& info, u32& binding);
115 void DefineStorageBuffers(const Info& info, u32& binding); 136 void DefineStorageBuffers(const Info& info, u32& binding);
116 void DefineTextures(const Info& info, u32& binding); 137 void DefineTextures(const Info& info, u32& binding);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 837f0e858..4f62af959 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx);
58void EmitSetOFlag(EmitContext& ctx); 58void EmitSetOFlag(EmitContext& ctx);
59Id EmitWorkgroupId(EmitContext& ctx); 59Id EmitWorkgroupId(EmitContext& ctx);
60Id EmitLocalInvocationId(EmitContext& ctx); 60Id EmitLocalInvocationId(EmitContext& ctx);
61Id EmitLoadLocal(EmitContext& ctx, Id word_offset);
62void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value);
61Id EmitUndefU1(EmitContext& ctx); 63Id EmitUndefU1(EmitContext& ctx);
62Id EmitUndefU8(EmitContext& ctx); 64Id EmitUndefU8(EmitContext& ctx);
63Id EmitUndefU16(EmitContext& ctx); 65Id EmitUndefU16(EmitContext& ctx);
@@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
94 Id value); 96 Id value);
95void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 97void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
96 Id value); 98 Id value);
99Id EmitLoadSharedU8(EmitContext& ctx, Id offset);
100Id EmitLoadSharedS8(EmitContext& ctx, Id offset);
101Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
102Id EmitLoadSharedS16(EmitContext& ctx, Id offset);
103Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
104Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
105Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
106void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value);
107void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
108void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
109void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
110void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
97Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); 111Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
98Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); 112Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
99Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); 113Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 4cbc2aec1..52dcef8a4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) {
238 return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); 238 return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id);
239} 239}
240 240
241Id EmitLoadLocal(EmitContext& ctx, Id word_offset) {
242 const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
243 return ctx.OpLoad(ctx.U32[1], pointer);
244}
245
246void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) {
247 const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)};
248 ctx.OpStore(pointer, value);
249}
250
241} // namespace Shader::Backend::SPIRV 251} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
new file mode 100644
index 000000000..fa2fc9ab4
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -0,0 +1,175 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6
7namespace Shader::Backend::SPIRV {
8namespace {
9Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
10 const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
11 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
12 return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
13}
14
15Id Word(EmitContext& ctx, Id offset) {
16 const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
17 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
18 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
19 return ctx.OpLoad(ctx.U32[1], pointer);
20}
21
22std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
23 const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))};
24 const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))};
25 const Id count_id{ctx.Constant(ctx.U32[1], count)};
26 return {bit, count_id};
27}
28} // Anonymous namespace
29
30Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
31 if (ctx.profile.support_explicit_workgroup_layout) {
32 const Id pointer{
33 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
34 return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
35 } else {
36 const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
37 return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
38 }
39}
40
41Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
42 if (ctx.profile.support_explicit_workgroup_layout) {
43 const Id pointer{
44 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
45 return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
46 } else {
47 const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
48 return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
49 }
50}
51
52Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
53 if (ctx.profile.support_explicit_workgroup_layout) {
54 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
55 return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
56 } else {
57 const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
58 return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
59 }
60}
61
62Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
63 if (ctx.profile.support_explicit_workgroup_layout) {
64 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
65 return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
66 } else {
67 const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
68 return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
69 }
70}
71
72Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
73 if (ctx.profile.support_explicit_workgroup_layout) {
74 const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
75 return ctx.OpLoad(ctx.U32[1], pointer);
76 } else {
77 return Word(ctx, offset);
78 }
79}
80
81Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
82 if (ctx.profile.support_explicit_workgroup_layout) {
83 const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
84 return ctx.OpLoad(ctx.U32[2], pointer);
85 } else {
86 const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
87 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
88 const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))};
89 const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
90 const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
91 return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
92 ctx.OpLoad(ctx.U32[1], rhs_pointer));
93 }
94}
95
96Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
97 if (ctx.profile.support_explicit_workgroup_layout) {
98 const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
99 return ctx.OpLoad(ctx.U32[4], pointer);
100 }
101 const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
102 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
103 std::array<Id, 4> values{};
104 for (u32 i = 0; i < 4; ++i) {
105 const Id index{i == 0 ? base_index
106 : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
107 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
108 values[i] = ctx.OpLoad(ctx.U32[1], pointer);
109 }
110 return ctx.OpCompositeConstruct(ctx.U32[4], values);
111}
112
113void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
114 if (ctx.profile.support_explicit_workgroup_layout) {
115 const Id pointer{
116 ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
117 ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
118 } else {
119 ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
120 }
121}
122
123void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
124 if (ctx.profile.support_explicit_workgroup_layout) {
125 const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
126 ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
127 } else {
128 ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
129 }
130}
131
132void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
133 Id pointer{};
134 if (ctx.profile.support_explicit_workgroup_layout) {
135 pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
136 } else {
137 const Id shift{ctx.Constant(ctx.U32[1], 2U)};
138 const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
139 pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
140 }
141 ctx.OpStore(pointer, value);
142}
143
144void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
145 if (ctx.profile.support_explicit_workgroup_layout) {
146 const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
147 ctx.OpStore(pointer, value);
148 return;
149 }
150 const Id shift{ctx.Constant(ctx.U32[1], 2U)};
151 const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
152 const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))};
153 const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
154 const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
155 ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
156 ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
157}
158
159void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
160 if (ctx.profile.support_explicit_workgroup_layout) {
161 const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
162 ctx.OpStore(pointer, value);
163 return;
164 }
165 const Id shift{ctx.Constant(ctx.U32[1], 2U)};
166 const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
167 for (u32 i = 0; i < 4; ++i) {
168 const Id index{i == 0 ? base_index
169 : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
170 const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
171 ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
172 }
173}
174
175} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0c62c1c54..9415d02f6 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -19,6 +19,10 @@ public:
19 19
20 [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; 20 [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0;
21 21
22 [[nodiscard]] virtual u32 LocalMemorySize() const = 0;
23
24 [[nodiscard]] virtual u32 SharedMemorySize() const = 0;
25
22 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; 26 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
23 27
24 [[nodiscard]] const ProgramHeader& SPH() const noexcept { 28 [[nodiscard]] const ProgramHeader& SPH() const noexcept {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6d41442ee..d6a1d8ec2 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
355 Inst(Opcode::WriteGlobal128, address, vector); 355 Inst(Opcode::WriteGlobal128, address, vector);
356} 356}
357 357
358U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
359 return Inst<U32>(Opcode::LoadLocal, word_offset);
360}
361
362void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
363 Inst(Opcode::WriteLocal, word_offset, value);
364}
365
366Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
367 switch (bit_size) {
368 case 8:
369 return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
370 case 16:
371 return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
372 case 32:
373 return Inst(Opcode::LoadSharedU32, offset);
374 case 64:
375 return Inst(Opcode::LoadSharedU64, offset);
376 case 128:
377 return Inst(Opcode::LoadSharedU128, offset);
378 }
379 throw InvalidArgument("Invalid bit size {}", bit_size);
380}
381
382void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
383 switch (bit_size) {
384 case 8:
385 Inst(Opcode::WriteSharedU8, offset, value);
386 break;
387 case 16:
388 Inst(Opcode::WriteSharedU16, offset, value);
389 break;
390 case 32:
391 Inst(Opcode::WriteSharedU32, offset, value);
392 break;
393 case 64:
394 Inst(Opcode::WriteSharedU64, offset, value);
395 break;
396 case 128:
397 Inst(Opcode::WriteSharedU128, offset, value);
398 break;
399 default:
400 throw InvalidArgument("Invalid bit size {}", bit_size);
401 }
402}
403
358U1 IREmitter::GetZeroFromOp(const Value& op) { 404U1 IREmitter::GetZeroFromOp(const Value& op) {
359 return Inst<U1>(Opcode::GetZeroFromOp, op); 405 return Inst<U1>(Opcode::GetZeroFromOp, op);
360} 406}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 8d50aa607..842c2bdaf 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -99,6 +99,12 @@ public:
99 void WriteGlobal64(const U64& address, const IR::Value& vector); 99 void WriteGlobal64(const U64& address, const IR::Value& vector);
100 void WriteGlobal128(const U64& address, const IR::Value& vector); 100 void WriteGlobal128(const U64& address, const IR::Value& vector);
101 101
102 [[nodiscard]] U32 LoadLocal(const U32& word_offset);
103 void WriteLocal(const U32& word_offset, const U32& value);
104
105 [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
106 void WriteShared(int bit_size, const U32& offset, const Value& value);
107
102 [[nodiscard]] U1 GetZeroFromOp(const Value& op); 108 [[nodiscard]] U1 GetZeroFromOp(const Value& op);
103 [[nodiscard]] U1 GetSignFromOp(const Value& op); 109 [[nodiscard]] U1 GetSignFromOp(const Value& op);
104 [[nodiscard]] U1 GetCarryFromOp(const Value& op); 110 [[nodiscard]] U1 GetCarryFromOp(const Value& op);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index be8eb4d4c..52a5e5034 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept {
76 case Opcode::WriteStorage32: 76 case Opcode::WriteStorage32:
77 case Opcode::WriteStorage64: 77 case Opcode::WriteStorage64:
78 case Opcode::WriteStorage128: 78 case Opcode::WriteStorage128:
79 case Opcode::WriteLocal:
80 case Opcode::WriteSharedU8:
81 case Opcode::WriteSharedU16:
82 case Opcode::WriteSharedU32:
83 case Opcode::WriteSharedU64:
84 case Opcode::WriteSharedU128:
79 return true; 85 return true;
80 default: 86 default:
81 return false; 87 return false;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 5d7462d76..c75658328 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32,
89OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) 89OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
90OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) 90OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
91 91
92// Local memory operations
93OPCODE(LoadLocal, U32, U32, )
94OPCODE(WriteLocal, Void, U32, U32, )
95
96// Shared memory operations
97OPCODE(LoadSharedU8, U32, U32, )
98OPCODE(LoadSharedS8, U32, U32, )
99OPCODE(LoadSharedU16, U32, U32, )
100OPCODE(LoadSharedS16, U32, U32, )
101OPCODE(LoadSharedU32, U32, U32, )
102OPCODE(LoadSharedU64, U32x2, U32, )
103OPCODE(LoadSharedU128, U32x4, U32, )
104OPCODE(WriteSharedU8, Void, U32, U32, )
105OPCODE(WriteSharedU16, Void, U32, U32, )
106OPCODE(WriteSharedU32, Void, U32, U32, )
107OPCODE(WriteSharedU64, Void, U32, U32x2, )
108OPCODE(WriteSharedU128, Void, U32, U32x4, )
109
92// Vector utility 110// Vector utility
93OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) 111OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
94OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) 112OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index 0162e919c..3a37b3ab9 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -21,6 +21,8 @@ struct Program {
21 Info info; 21 Info info;
22 Stage stage{}; 22 Stage stage{};
23 std::array<u32, 3> workgroup_size{}; 23 std::array<u32, 3> workgroup_size{};
24 u32 local_memory_size{};
25 u32 shared_memory_size{};
24}; 26};
25 27
26[[nodiscard]] std::string DumpProgram(const Program& program); 28[[nodiscard]] std::string DumpProgram(const Program& program);
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index a914a91f4..7b08f11b0 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
67 program.blocks = VisitAST(inst_pool, block_pool, env, cfg); 67 program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
68 program.post_order_blocks = PostOrder(program.blocks); 68 program.post_order_blocks = PostOrder(program.blocks);
69 program.stage = env.ShaderStage(); 69 program.stage = env.ShaderStage();
70 program.local_memory_size = env.LocalMemorySize();
70 if (program.stage == Stage::Compute) { 71 if (program.stage == Stage::Compute) {
71 program.workgroup_size = env.WorkgroupSize(); 72 program.workgroup_size = env.WorkgroupSize();
73 program.shared_memory_size = env.SharedMemorySize();
72 } 74 }
73 RemoveUnreachableBlocks(program); 75 RemoveUnreachableBlocks(program);
74 76
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..68963c8ea
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,197 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Size : u64 {
12 U8,
13 S8,
14 U16,
15 S16,
16 B32,
17 B64,
18 B128,
19};
20
21IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
22 union {
23 u64 raw;
24 BitField<8, 8, IR::Reg> offset_reg;
25 BitField<20, 24, u64> absolute_offset;
26 BitField<20, 24, s64> relative_offset;
27 } const encoding{insn};
28
29 if (encoding.offset_reg == IR::Reg::RZ) {
30 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
31 } else {
32 const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
33 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
34 }
35}
36
37std::pair<int, bool> GetSize(u64 insn) {
38 union {
39 u64 raw;
40 BitField<48, 3, Size> size;
41 } const encoding{insn};
42
43 const Size nnn = encoding.size;
44 switch (encoding.size) {
45 case Size::U8:
46 return {8, false};
47 case Size::S8:
48 return {8, true};
49 case Size::U16:
50 return {16, false};
51 case Size::S16:
52 return {16, true};
53 case Size::B32:
54 return {32, false};
55 case Size::B64:
56 return {64, false};
57 case Size::B128:
58 return {128, false};
59 default:
60 throw NotImplementedException("Invalid size {}", encoding.size.Value());
61 }
62}
63
64IR::Reg Reg(u64 insn) {
65 union {
66 u64 raw;
67 BitField<0, 8, IR::Reg> reg;
68 } const encoding{insn};
69
70 return encoding.reg;
71}
72
73IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
74 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
75}
76
77IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
78 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
79}
80} // Anonymous namespace
81
82void TranslatorVisitor::LDL(u64 insn) {
83 const IR::U32 offset{Offset(*this, insn)};
84 const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
85
86 const IR::Reg dest{Reg(insn)};
87 const auto [bit_size, is_signed]{GetSize(insn)};
88 switch (bit_size) {
89 case 8: {
90 const IR::U32 bit{ByteOffset(ir, offset)};
91 X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
92 break;
93 }
94 case 16: {
95 const IR::U32 bit{ShortOffset(ir, offset)};
96 X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
97 break;
98 }
99 case 32:
100 case 64:
101 case 128:
102 if (!IR::IsAligned(dest, bit_size / 32)) {
103 throw NotImplementedException("Unaligned destination register {}", dest);
104 }
105 X(dest, ir.LoadLocal(word_offset));
106 for (int i = 1; i < bit_size / 32; ++i) {
107 X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
108 }
109 break;
110 }
111}
112
113void TranslatorVisitor::LDS(u64 insn) {
114 const IR::U32 offset{Offset(*this, insn)};
115 const IR::Reg dest{Reg(insn)};
116 const auto [bit_size, is_signed]{GetSize(insn)};
117 const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
118 switch (bit_size) {
119 case 8:
120 case 16:
121 case 32:
122 X(dest, IR::U32{value});
123 break;
124 case 64:
125 case 128:
126 if (!IR::IsAligned(dest, bit_size / 32)) {
127 throw NotImplementedException("Unaligned destination register {}", dest);
128 }
129 for (int element = 0; element < bit_size / 32; ++element) {
130 X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
131 }
132 break;
133 }
134}
135
136void TranslatorVisitor::STL(u64 insn) {
137 const IR::U32 offset{Offset(*this, insn)};
138 const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
139
140 const IR::Reg reg{Reg(insn)};
141 const IR::U32 src{X(reg)};
142 const int bit_size{GetSize(insn).first};
143 switch (bit_size) {
144 case 8: {
145 const IR::U32 bit{ByteOffset(ir, offset)};
146 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
147 ir.WriteLocal(word_offset, value);
148 break;
149 }
150 case 16: {
151 const IR::U32 bit{ShortOffset(ir, offset)};
152 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
153 ir.WriteLocal(word_offset, value);
154 break;
155 }
156 case 32:
157 case 64:
158 case 128:
159 if (!IR::IsAligned(reg, bit_size / 32)) {
160 throw NotImplementedException("Unaligned source register");
161 }
162 ir.WriteLocal(word_offset, src);
163 for (int i = 1; i < bit_size / 32; ++i) {
164 ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
165 }
166 break;
167 }
168}
169
170void TranslatorVisitor::STS(u64 insn) {
171 const IR::U32 offset{Offset(*this, insn)};
172 const IR::Reg reg{Reg(insn)};
173 const int bit_size{GetSize(insn).first};
174 switch (bit_size) {
175 case 8:
176 case 16:
177 case 32:
178 ir.WriteShared(bit_size, offset, X(reg));
179 break;
180 case 64:
181 if (!IR::IsAligned(reg, 2)) {
182 throw NotImplementedException("Unaligned source register {}", reg);
183 }
184 ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
185 break;
186 case 128: {
187 if (!IR::IsAligned(reg, 2)) {
188 throw NotImplementedException("Unaligned source register {}", reg);
189 }
190 const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
191 ir.WriteShared(128, offset, vector);
192 break;
193 }
194 }
195}
196
197} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 409216640..b62d8ee2a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) {
193 ThrowNotImplemented(Opcode::LD); 193 ThrowNotImplemented(Opcode::LD);
194} 194}
195 195
196void TranslatorVisitor::LDL(u64) {
197 ThrowNotImplemented(Opcode::LDL);
198}
199
200void TranslatorVisitor::LDS(u64) {
201 ThrowNotImplemented(Opcode::LDS);
202}
203
204void TranslatorVisitor::LEPC(u64) { 196void TranslatorVisitor::LEPC(u64) {
205 ThrowNotImplemented(Opcode::LEPC); 197 ThrowNotImplemented(Opcode::LEPC);
206} 198}
@@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) {
309 ThrowNotImplemented(Opcode::ST); 301 ThrowNotImplemented(Opcode::ST);
310} 302}
311 303
312void TranslatorVisitor::STL(u64) {
313 ThrowNotImplemented(Opcode::STL);
314}
315
316void TranslatorVisitor::STP(u64) { 304void TranslatorVisitor::STP(u64) {
317 ThrowNotImplemented(Opcode::STP); 305 ThrowNotImplemented(Opcode::STP);
318} 306}
319 307
320void TranslatorVisitor::STS(u64) {
321 ThrowNotImplemented(Opcode::STS);
322}
323
324void TranslatorVisitor::SUATOM_cas(u64) { 308void TranslatorVisitor::SUATOM_cas(u64) {
325 ThrowNotImplemented(Opcode::SUATOM_cas); 309 ThrowNotImplemented(Opcode::SUATOM_cas);
326} 310}
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 60be67228..c932c307b 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
200 case IR::Opcode::LoadStorageS8: 200 case IR::Opcode::LoadStorageS8:
201 case IR::Opcode::WriteStorageU8: 201 case IR::Opcode::WriteStorageU8:
202 case IR::Opcode::WriteStorageS8: 202 case IR::Opcode::WriteStorageS8:
203 case IR::Opcode::LoadSharedU8:
204 case IR::Opcode::LoadSharedS8:
205 case IR::Opcode::WriteSharedU8:
203 case IR::Opcode::SelectU8: 206 case IR::Opcode::SelectU8:
204 case IR::Opcode::ConvertF16S8: 207 case IR::Opcode::ConvertF16S8:
205 case IR::Opcode::ConvertF16U8: 208 case IR::Opcode::ConvertF16U8:
@@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
224 case IR::Opcode::LoadStorageS16: 227 case IR::Opcode::LoadStorageS16:
225 case IR::Opcode::WriteStorageU16: 228 case IR::Opcode::WriteStorageU16:
226 case IR::Opcode::WriteStorageS16: 229 case IR::Opcode::WriteStorageS16:
230 case IR::Opcode::LoadSharedU16:
231 case IR::Opcode::LoadSharedS16:
232 case IR::Opcode::WriteSharedU16:
227 case IR::Opcode::SelectU16: 233 case IR::Opcode::SelectU16:
228 case IR::Opcode::BitCastU16F16: 234 case IR::Opcode::BitCastU16F16:
229 case IR::Opcode::BitCastF16U16: 235 case IR::Opcode::BitCastF16U16:
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index e26047751..0276fc23b 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -18,6 +18,8 @@ enum class AttributeType : u8 {
18}; 18};
19 19
20struct Profile { 20struct Profile {
21 u32 supported_spirv{0x00010000};
22
21 bool unified_descriptor_binding{}; 23 bool unified_descriptor_binding{};
22 bool support_vertex_instance_id{}; 24 bool support_vertex_instance_id{};
23 bool support_float_controls{}; 25 bool support_float_controls{};
@@ -30,6 +32,7 @@ struct Profile {
30 bool support_fp16_signed_zero_nan_preserve{}; 32 bool support_fp16_signed_zero_nan_preserve{};
31 bool support_fp32_signed_zero_nan_preserve{}; 33 bool support_fp32_signed_zero_nan_preserve{};
32 bool support_fp64_signed_zero_nan_preserve{}; 34 bool support_fp64_signed_zero_nan_preserve{};
35 bool support_explicit_workgroup_layout{};
33 bool support_vote{}; 36 bool support_vote{};
34 bool warp_size_potentially_larger_than_guest{}; 37 bool warp_size_potentially_larger_than_guest{};
35 38
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 69dd945b2..0d6a32bfd 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -114,10 +114,12 @@ public:
114 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); 114 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size);
115 115
116 const u64 num_texture_types{static_cast<u64>(texture_types.size())}; 116 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
117 const u32 local_memory_size{LocalMemorySize()};
117 const u32 texture_bound{TextureBoundBuffer()}; 118 const u32 texture_bound{TextureBoundBuffer()};
118 119
119 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) 120 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
120 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) 121 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
122 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
121 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) 123 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
122 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) 124 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
123 .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest)) 125 .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest))
@@ -132,7 +134,10 @@ public:
132 file.flush(); 134 file.flush();
133 if (stage == Shader::Stage::Compute) { 135 if (stage == Shader::Stage::Compute) {
134 const std::array<u32, 3> workgroup_size{WorkgroupSize()}; 136 const std::array<u32, 3> workgroup_size{WorkgroupSize()};
135 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)); 137 const u32 shared_memory_size{SharedMemorySize()};
138 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
139 .write(reinterpret_cast<const char*>(&shared_memory_size),
140 sizeof(shared_memory_size));
136 } else { 141 } else {
137 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); 142 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
138 } 143 }
@@ -278,6 +283,16 @@ public:
278 return maxwell3d->regs.tex_cb_index; 283 return maxwell3d->regs.tex_cb_index;
279 } 284 }
280 285
286 u32 LocalMemorySize() const override {
287 const u64 size{sph.LocalMemorySize()};
288 ASSERT(size <= std::numeric_limits<u32>::max());
289 return static_cast<u32>(size);
290 }
291
292 u32 SharedMemorySize() const override {
293 throw Shader::LogicError("Requesting shared memory size in graphics stage");
294 }
295
281 std::array<u32, 3> WorkgroupSize() const override { 296 std::array<u32, 3> WorkgroupSize() const override {
282 throw Shader::LogicError("Requesting workgroup size in a graphics stage"); 297 throw Shader::LogicError("Requesting workgroup size in a graphics stage");
283 } 298 }
@@ -313,6 +328,16 @@ public:
313 return kepler_compute->regs.tex_cb_index; 328 return kepler_compute->regs.tex_cb_index;
314 } 329 }
315 330
331 u32 LocalMemorySize() const override {
332 const auto& qmd{kepler_compute->launch_description};
333 return qmd.local_pos_alloc;
334 }
335
336 u32 SharedMemorySize() const override {
337 const auto& qmd{kepler_compute->launch_description};
338 return qmd.shared_alloc;
339 }
340
316 std::array<u32, 3> WorkgroupSize() const override { 341 std::array<u32, 3> WorkgroupSize() const override {
317 const auto& qmd{kepler_compute->launch_description}; 342 const auto& qmd{kepler_compute->launch_description};
318 return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; 343 return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
@@ -366,6 +391,7 @@ public:
366 u64 num_texture_types{}; 391 u64 num_texture_types{};
367 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) 392 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
368 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) 393 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
394 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
369 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) 395 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
370 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) 396 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
371 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) 397 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
@@ -381,7 +407,8 @@ public:
381 texture_types.emplace(key, type); 407 texture_types.emplace(key, type);
382 } 408 }
383 if (stage == Shader::Stage::Compute) { 409 if (stage == Shader::Stage::Compute) {
384 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)); 410 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
411 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
385 } else { 412 } else {
386 file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); 413 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
387 } 414 }
@@ -402,6 +429,14 @@ public:
402 return it->second; 429 return it->second;
403 } 430 }
404 431
432 u32 LocalMemorySize() const override {
433 return local_memory_size;
434 }
435
436 u32 SharedMemorySize() const override {
437 return shared_memory_size;
438 }
439
405 u32 TextureBoundBuffer() const override { 440 u32 TextureBoundBuffer() const override {
406 return texture_bound; 441 return texture_bound;
407 } 442 }
@@ -414,6 +449,8 @@ private:
414 std::unique_ptr<u64[]> code; 449 std::unique_ptr<u64[]> code;
415 std::unordered_map<u64, Shader::TextureType> texture_types; 450 std::unordered_map<u64, Shader::TextureType> texture_types;
416 std::array<u32, 3> workgroup_size{}; 451 std::array<u32, 3> workgroup_size{};
452 u32 local_memory_size{};
453 u32 shared_memory_size{};
417 u32 texture_bound{}; 454 u32 texture_bound{};
418 u32 read_lowest{}; 455 u32 read_lowest{};
419 u32 read_highest{}; 456 u32 read_highest{};
@@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
541 const auto& float_control{device.FloatControlProperties()}; 578 const auto& float_control{device.FloatControlProperties()};
542 const VkDriverIdKHR driver_id{device.GetDriverID()}; 579 const VkDriverIdKHR driver_id{device.GetDriverID()};
543 base_profile = Shader::Profile{ 580 base_profile = Shader::Profile{
581 .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
544 .unified_descriptor_binding = true, 582 .unified_descriptor_binding = true,
545 .support_vertex_instance_id = false, 583 .support_vertex_instance_id = false,
546 .support_float_controls = true, 584 .support_float_controls = true,
@@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
558 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, 596 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
559 .support_fp64_signed_zero_nan_preserve = 597 .support_fp64_signed_zero_nan_preserve =
560 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, 598 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
599 .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
561 .support_vote = true, 600 .support_vote = true,
562 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), 601 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
563 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, 602 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
@@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
600 shader = MakeShaderInfo(env, *cpu_shader_addr); 639 shader = MakeShaderInfo(env, *cpu_shader_addr);
601 } 640 }
602 const ComputePipelineCacheKey key{ 641 const ComputePipelineCacheKey key{
603 .unique_hash = shader->unique_hash, 642 .unique_hash{shader->unique_hash},
604 .shared_memory_size = qmd.shared_alloc, 643 .shared_memory_size{qmd.shared_alloc},
605 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, 644 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
606 }; 645 };
607 const auto [pair, is_new]{compute_cache.try_emplace(key)}; 646 const auto [pair, is_new]{compute_cache.try_emplace(key)};
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 009b74f12..c027598ba 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
399 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 399 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
400 } 400 }
401 401
402 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
403 if (khr_workgroup_memory_explicit_layout) {
404 workgroup_layout = {
405 .sType =
406 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
407 .pNext = nullptr,
408 .workgroupMemoryExplicitLayout = VK_TRUE,
409 .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
410 .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
411 .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
412 };
413 SetNext(next, workgroup_layout);
414 }
415
402 if (!ext_depth_range_unrestricted) { 416 if (!ext_depth_range_unrestricted) {
403 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 417 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
404 } 418 }
@@ -662,6 +676,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
662 } 676 }
663 677
664 bool has_khr_shader_float16_int8{}; 678 bool has_khr_shader_float16_int8{};
679 bool has_khr_workgroup_memory_explicit_layout{};
665 bool has_ext_subgroup_size_control{}; 680 bool has_ext_subgroup_size_control{};
666 bool has_ext_transform_feedback{}; 681 bool has_ext_transform_feedback{};
667 bool has_ext_custom_border_color{}; 682 bool has_ext_custom_border_color{};
@@ -682,6 +697,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
682 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); 697 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
683 test(khr_uniform_buffer_standard_layout, 698 test(khr_uniform_buffer_standard_layout,
684 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 699 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
700 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
685 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 701 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
686 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 702 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
687 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 703 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -694,6 +710,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
694 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 710 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
695 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 711 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
696 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 712 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
713 test(has_khr_workgroup_memory_explicit_layout,
714 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
697 if (Settings::values.renderer_debug) { 715 if (Settings::values.renderer_debug) {
698 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, 716 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
699 true); 717 true);
@@ -787,6 +805,22 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
787 ext_extended_dynamic_state = true; 805 ext_extended_dynamic_state = true;
788 } 806 }
789 } 807 }
808 if (has_khr_workgroup_memory_explicit_layout) {
809 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
810 layout.sType =
811 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
812 layout.pNext = nullptr;
813 features.pNext = &layout;
814 physical.GetFeatures2KHR(features);
815
816 if (layout.workgroupMemoryExplicitLayout &&
817 layout.workgroupMemoryExplicitLayout8BitAccess &&
818 layout.workgroupMemoryExplicitLayout16BitAccess &&
819 layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
820 extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
821 khr_workgroup_memory_explicit_layout = true;
822 }
823 }
790 return extensions; 824 return extensions;
791} 825}
792 826
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index c268a4f8d..ac2311e7e 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -168,11 +168,21 @@ public:
168 return nv_viewport_swizzle; 168 return nv_viewport_swizzle;
169 } 169 }
170 170
171 /// Returns true if the device supports VK_EXT_scalar_block_layout. 171 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
172 bool IsKhrUniformBufferStandardLayoutSupported() const { 172 bool IsKhrUniformBufferStandardLayoutSupported() const {
173 return khr_uniform_buffer_standard_layout; 173 return khr_uniform_buffer_standard_layout;
174 } 174 }
175 175
176 /// Returns true if the device supports VK_KHR_spirv_1_4.
177 bool IsKhrSpirv1_4Supported() const {
178 return khr_spirv_1_4;
179 }
180
181 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
182 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
183 return khr_workgroup_memory_explicit_layout;
184 }
185
176 /// Returns true if the device supports VK_EXT_index_type_uint8. 186 /// Returns true if the device supports VK_EXT_index_type_uint8.
177 bool IsExtIndexTypeUint8Supported() const { 187 bool IsExtIndexTypeUint8Supported() const {
178 return ext_index_type_uint8; 188 return ext_index_type_uint8;
@@ -296,20 +306,22 @@ private:
296 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. 306 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
297 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. 307 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
298 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 308 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
299 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 309 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
300 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 310 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
301 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. 311 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
302 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 312 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
303 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 313 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
304 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. 314 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
305 bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. 315 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
306 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 316 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
307 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 317 bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
308 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 318 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
309 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 319 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
310 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 320 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
311 bool has_renderdoc{}; ///< Has RenderDoc attached 321 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
312 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 322 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
323 bool has_renderdoc{}; ///< Has RenderDoc attached
324 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
313 325
314 // Telemetry parameters 326 // Telemetry parameters
315 std::string vendor_name; ///< Device's driver name. 327 std::string vendor_name; ///< Device's driver name.