diff options
Diffstat (limited to 'src')
20 files changed, 730 insertions, 36 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 55b846c84..003cbefb1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC | |||
| 14 | backend/spirv/emit_spirv_logical.cpp | 14 | backend/spirv/emit_spirv_logical.cpp |
| 15 | backend/spirv/emit_spirv_memory.cpp | 15 | backend/spirv/emit_spirv_memory.cpp |
| 16 | backend/spirv/emit_spirv_select.cpp | 16 | backend/spirv/emit_spirv_select.cpp |
| 17 | backend/spirv/emit_spirv_shared_memory.cpp | ||
| 17 | backend/spirv/emit_spirv_special.cpp | 18 | backend/spirv/emit_spirv_special.cpp |
| 18 | backend/spirv/emit_spirv_undefined.cpp | 19 | backend/spirv/emit_spirv_undefined.cpp |
| 19 | backend/spirv/emit_spirv_warp.cpp | 20 | backend/spirv/emit_spirv_warp.cpp |
| @@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC | |||
| 111 | frontend/maxwell/translate/impl/load_constant.cpp | 112 | frontend/maxwell/translate/impl/load_constant.cpp |
| 112 | frontend/maxwell/translate/impl/load_effective_address.cpp | 113 | frontend/maxwell/translate/impl/load_effective_address.cpp |
| 113 | frontend/maxwell/translate/impl/load_store_attribute.cpp | 114 | frontend/maxwell/translate/impl/load_store_attribute.cpp |
| 115 | frontend/maxwell/translate/impl/load_store_local_shared.cpp | ||
| 114 | frontend/maxwell/translate/impl/load_store_memory.cpp | 116 | frontend/maxwell/translate/impl/load_store_memory.cpp |
| 115 | frontend/maxwell/translate/impl/logic_operation.cpp | 117 | frontend/maxwell/translate/impl/logic_operation.cpp |
| 116 | frontend/maxwell/translate/impl/logic_operation_three_input.cpp | 118 | frontend/maxwell/translate/impl/logic_operation_three_input.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a8ca33c1d..96d0e9b4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/div_ceil.h" | ||
| 12 | #include "shader_recompiler/backend/spirv/emit_context.h" | 13 | #include "shader_recompiler/backend/spirv/emit_context.h" |
| 13 | 14 | ||
| 14 | namespace Shader::Backend::SPIRV { | 15 | namespace Shader::Backend::SPIRV { |
| @@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie | |||
| 96 | } | 97 | } |
| 97 | 98 | ||
| 98 | EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) | 99 | EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) |
| 99 | : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { | 100 | : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { |
| 100 | AddCapability(spv::Capability::Shader); | 101 | AddCapability(spv::Capability::Shader); |
| 101 | DefineCommonTypes(program.info); | 102 | DefineCommonTypes(program.info); |
| 102 | DefineCommonConstants(); | 103 | DefineCommonConstants(); |
| 103 | DefineInterfaces(program.info); | 104 | DefineInterfaces(program.info); |
| 105 | DefineLocalMemory(program); | ||
| 106 | DefineSharedMemory(program); | ||
| 104 | DefineConstantBuffers(program.info, binding); | 107 | DefineConstantBuffers(program.info, binding); |
| 105 | DefineStorageBuffers(program.info, binding); | 108 | DefineStorageBuffers(program.info, binding); |
| 106 | DefineTextures(program.info, binding); | 109 | DefineTextures(program.info, binding); |
| @@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||
| 143 | F32.Define(*this, TypeFloat(32), "f32"); | 146 | F32.Define(*this, TypeFloat(32), "f32"); |
| 144 | U32.Define(*this, TypeInt(32, false), "u32"); | 147 | U32.Define(*this, TypeInt(32, false), "u32"); |
| 145 | 148 | ||
| 149 | private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); | ||
| 150 | |||
| 146 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); | 151 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); |
| 147 | input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); | 152 | input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); |
| 148 | input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); | 153 | input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); |
| @@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) { | |||
| 184 | DefineOutputs(info); | 189 | DefineOutputs(info); |
| 185 | } | 190 | } |
| 186 | 191 | ||
| 192 | void EmitContext::DefineLocalMemory(const IR::Program& program) { | ||
| 193 | if (program.local_memory_size == 0) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; | ||
| 197 | const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; | ||
| 198 | const Id pointer{TypePointer(spv::StorageClass::Private, type)}; | ||
| 199 | local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); | ||
| 200 | if (profile.supported_spirv >= 0x00010400) { | ||
| 201 | interfaces.push_back(local_memory); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||
| 206 | if (program.shared_memory_size == 0) { | ||
| 207 | return; | ||
| 208 | } | ||
| 209 | const auto make{[&](Id element_type, u32 element_size) { | ||
| 210 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; | ||
| 211 | const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; | ||
| 212 | Decorate(array_type, spv::Decoration::ArrayStride, element_size); | ||
| 213 | |||
| 214 | const Id struct_type{TypeStruct(array_type)}; | ||
| 215 | MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); | ||
| 216 | Decorate(struct_type, spv::Decoration::Block); | ||
| 217 | |||
| 218 | const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; | ||
| 219 | const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; | ||
| 220 | const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; | ||
| 221 | Decorate(variable, spv::Decoration::Aliased); | ||
| 222 | interfaces.push_back(variable); | ||
| 223 | |||
| 224 | return std::make_pair(variable, element_pointer); | ||
| 225 | }}; | ||
| 226 | if (profile.support_explicit_workgroup_layout) { | ||
| 227 | AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); | ||
| 228 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); | ||
| 229 | if (program.info.uses_int8) { | ||
| 230 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); | ||
| 231 | std::tie(shared_memory_u8, shared_u8) = make(U8, 1); | ||
| 232 | } | ||
| 233 | if (program.info.uses_int16) { | ||
| 234 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); | ||
| 235 | std::tie(shared_memory_u16, shared_u16) = make(U16, 2); | ||
| 236 | } | ||
| 237 | std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); | ||
| 238 | std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); | ||
| 239 | std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); | ||
| 240 | } | ||
| 241 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; | ||
| 242 | const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; | ||
| 243 | const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; | ||
| 244 | shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); | ||
| 245 | shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); | ||
| 246 | interfaces.push_back(shared_memory_u32); | ||
| 247 | |||
| 248 | const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; | ||
| 249 | const auto make_function{[&](u32 mask, u32 size) { | ||
| 250 | const Id loop_header{OpLabel()}; | ||
| 251 | const Id continue_block{OpLabel()}; | ||
| 252 | const Id merge_block{OpLabel()}; | ||
| 253 | |||
| 254 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 255 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 256 | const Id insert_value{OpFunctionParameter(U32[1])}; | ||
| 257 | AddLabel(); | ||
| 258 | OpBranch(loop_header); | ||
| 259 | |||
| 260 | AddLabel(loop_header); | ||
| 261 | const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; | ||
| 262 | const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; | ||
| 263 | const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; | ||
| 264 | const Id count{Constant(U32[1], size)}; | ||
| 265 | OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 266 | OpBranch(continue_block); | ||
| 267 | |||
| 268 | AddLabel(continue_block); | ||
| 269 | const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; | ||
| 270 | const Id old_value{OpLoad(U32[1], word_pointer)}; | ||
| 271 | const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; | ||
| 272 | const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), | ||
| 273 | u32_zero_value, u32_zero_value, new_value, | ||
| 274 | old_value)}; | ||
| 275 | const Id success{OpIEqual(U1, atomic_res, old_value)}; | ||
| 276 | OpBranchConditional(success, merge_block, loop_header); | ||
| 277 | |||
| 278 | AddLabel(merge_block); | ||
| 279 | OpReturn(); | ||
| 280 | OpFunctionEnd(); | ||
| 281 | return func; | ||
| 282 | }}; | ||
| 283 | if (program.info.uses_int8) { | ||
| 284 | shared_store_u8_func = make_function(24, 8); | ||
| 285 | } | ||
| 286 | if (program.info.uses_int16) { | ||
| 287 | shared_store_u16_func = make_function(16, 16); | ||
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 187 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | 291 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { |
| 188 | if (info.constant_buffer_descriptors.empty()) { | 292 | if (info.constant_buffer_descriptors.empty()) { |
| 189 | return; | 293 | return; |
| @@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | |||
| 234 | Decorate(id, spv::Decoration::Binding, binding); | 338 | Decorate(id, spv::Decoration::Binding, binding); |
| 235 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | 339 | Decorate(id, spv::Decoration::DescriptorSet, 0U); |
| 236 | Name(id, fmt::format("ssbo{}", index)); | 340 | Name(id, fmt::format("ssbo{}", index)); |
| 341 | if (profile.supported_spirv >= 0x00010400) { | ||
| 342 | interfaces.push_back(id); | ||
| 343 | } | ||
| 237 | std::fill_n(ssbos.data() + index, desc.count, id); | 344 | std::fill_n(ssbos.data() + index, desc.count, id); |
| 238 | index += desc.count; | 345 | index += desc.count; |
| 239 | binding += desc.count; | 346 | binding += desc.count; |
| @@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { | |||
| 261 | .image_type{image_type}, | 368 | .image_type{image_type}, |
| 262 | }); | 369 | }); |
| 263 | } | 370 | } |
| 371 | if (profile.supported_spirv >= 0x00010400) { | ||
| 372 | interfaces.push_back(id); | ||
| 373 | } | ||
| 264 | binding += desc.count; | 374 | binding += desc.count; |
| 265 | } | 375 | } |
| 266 | } | 376 | } |
| @@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: | |||
| 363 | for (size_t i = 0; i < desc.count; ++i) { | 473 | for (size_t i = 0; i < desc.count; ++i) { |
| 364 | cbufs[desc.index + i].*member_type = id; | 474 | cbufs[desc.index + i].*member_type = id; |
| 365 | } | 475 | } |
| 476 | if (profile.supported_spirv >= 0x00010400) { | ||
| 477 | interfaces.push_back(id); | ||
| 478 | } | ||
| 366 | binding += desc.count; | 479 | binding += desc.count; |
| 367 | } | 480 | } |
| 368 | } | 481 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 01b7b665d..1a4e8221a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -73,6 +73,14 @@ public: | |||
| 73 | 73 | ||
| 74 | UniformDefinitions uniform_types; | 74 | UniformDefinitions uniform_types; |
| 75 | 75 | ||
| 76 | Id private_u32{}; | ||
| 77 | |||
| 78 | Id shared_u8{}; | ||
| 79 | Id shared_u16{}; | ||
| 80 | Id shared_u32{}; | ||
| 81 | Id shared_u32x2{}; | ||
| 82 | Id shared_u32x4{}; | ||
| 83 | |||
| 76 | Id input_f32{}; | 84 | Id input_f32{}; |
| 77 | Id input_u32{}; | 85 | Id input_u32{}; |
| 78 | Id input_s32{}; | 86 | Id input_s32{}; |
| @@ -96,6 +104,17 @@ public: | |||
| 96 | Id base_vertex{}; | 104 | Id base_vertex{}; |
| 97 | Id front_face{}; | 105 | Id front_face{}; |
| 98 | 106 | ||
| 107 | Id local_memory{}; | ||
| 108 | |||
| 109 | Id shared_memory_u8{}; | ||
| 110 | Id shared_memory_u16{}; | ||
| 111 | Id shared_memory_u32{}; | ||
| 112 | Id shared_memory_u32x2{}; | ||
| 113 | Id shared_memory_u32x4{}; | ||
| 114 | |||
| 115 | Id shared_store_u8_func{}; | ||
| 116 | Id shared_store_u16_func{}; | ||
| 117 | |||
| 99 | Id input_position{}; | 118 | Id input_position{}; |
| 100 | std::array<Id, 32> input_generics{}; | 119 | std::array<Id, 32> input_generics{}; |
| 101 | 120 | ||
| @@ -111,6 +130,8 @@ private: | |||
| 111 | void DefineCommonTypes(const Info& info); | 130 | void DefineCommonTypes(const Info& info); |
| 112 | void DefineCommonConstants(); | 131 | void DefineCommonConstants(); |
| 113 | void DefineInterfaces(const Info& info); | 132 | void DefineInterfaces(const Info& info); |
| 133 | void DefineLocalMemory(const IR::Program& program); | ||
| 134 | void DefineSharedMemory(const IR::Program& program); | ||
| 114 | void DefineConstantBuffers(const Info& info, u32& binding); | 135 | void DefineConstantBuffers(const Info& info, u32& binding); |
| 115 | void DefineStorageBuffers(const Info& info, u32& binding); | 136 | void DefineStorageBuffers(const Info& info, u32& binding); |
| 116 | void DefineTextures(const Info& info, u32& binding); | 137 | void DefineTextures(const Info& info, u32& binding); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 837f0e858..4f62af959 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx); | |||
| 58 | void EmitSetOFlag(EmitContext& ctx); | 58 | void EmitSetOFlag(EmitContext& ctx); |
| 59 | Id EmitWorkgroupId(EmitContext& ctx); | 59 | Id EmitWorkgroupId(EmitContext& ctx); |
| 60 | Id EmitLocalInvocationId(EmitContext& ctx); | 60 | Id EmitLocalInvocationId(EmitContext& ctx); |
| 61 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset); | ||
| 62 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); | ||
| 61 | Id EmitUndefU1(EmitContext& ctx); | 63 | Id EmitUndefU1(EmitContext& ctx); |
| 62 | Id EmitUndefU8(EmitContext& ctx); | 64 | Id EmitUndefU8(EmitContext& ctx); |
| 63 | Id EmitUndefU16(EmitContext& ctx); | 65 | Id EmitUndefU16(EmitContext& ctx); |
| @@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va | |||
| 94 | Id value); | 96 | Id value); |
| 95 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 97 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 96 | Id value); | 98 | Id value); |
| 99 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset); | ||
| 100 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset); | ||
| 101 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset); | ||
| 102 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset); | ||
| 103 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset); | ||
| 104 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset); | ||
| 105 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset); | ||
| 106 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); | ||
| 107 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); | ||
| 108 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); | ||
| 109 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); | ||
| 110 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); | ||
| 97 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | 111 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); |
| 98 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | 112 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); |
| 99 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | 113 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4cbc2aec1..52dcef8a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) { | |||
| 238 | return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); | 238 | return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { | ||
| 242 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 243 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 244 | } | ||
| 245 | |||
| 246 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { | ||
| 247 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 248 | ctx.OpStore(pointer, value); | ||
| 249 | } | ||
| 250 | |||
| 241 | } // namespace Shader::Backend::SPIRV | 251 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..fa2fc9ab4 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp | |||
| @@ -0,0 +1,175 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { | ||
| 10 | const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; | ||
| 11 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 12 | return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id Word(EmitContext& ctx, Id offset) { | ||
| 16 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 17 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 18 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 19 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 20 | } | ||
| 21 | |||
| 22 | std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { | ||
| 23 | const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; | ||
| 24 | const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; | ||
| 25 | const Id count_id{ctx.Constant(ctx.U32[1], count)}; | ||
| 26 | return {bit, count_id}; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { | ||
| 31 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 32 | const Id pointer{ | ||
| 33 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 34 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 35 | } else { | ||
| 36 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 37 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { | ||
| 42 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 43 | const Id pointer{ | ||
| 44 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 45 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 46 | } else { | ||
| 47 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 48 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { | ||
| 53 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 54 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 55 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 56 | } else { | ||
| 57 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 58 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { | ||
| 63 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 64 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 65 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 66 | } else { | ||
| 67 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 68 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { | ||
| 73 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 74 | const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; | ||
| 75 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 76 | } else { | ||
| 77 | return Word(ctx, offset); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { | ||
| 82 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 83 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 84 | return ctx.OpLoad(ctx.U32[2], pointer); | ||
| 85 | } else { | ||
| 86 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 87 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 88 | const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; | ||
| 89 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; | ||
| 90 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; | ||
| 91 | return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), | ||
| 92 | ctx.OpLoad(ctx.U32[1], rhs_pointer)); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { | ||
| 97 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 98 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 99 | return ctx.OpLoad(ctx.U32[4], pointer); | ||
| 100 | } | ||
| 101 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 102 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 103 | std::array<Id, 4> values{}; | ||
| 104 | for (u32 i = 0; i < 4; ++i) { | ||
| 105 | const Id index{i == 0 ? base_index | ||
| 106 | : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; | ||
| 107 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 108 | values[i] = ctx.OpLoad(ctx.U32[1], pointer); | ||
| 109 | } | ||
| 110 | return ctx.OpCompositeConstruct(ctx.U32[4], values); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { | ||
| 114 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 115 | const Id pointer{ | ||
| 116 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 117 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); | ||
| 118 | } else { | ||
| 119 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { | ||
| 124 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 125 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 126 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); | ||
| 127 | } else { | ||
| 128 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { | ||
| 133 | Id pointer{}; | ||
| 134 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 135 | pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); | ||
| 136 | } else { | ||
| 137 | const Id shift{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 138 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 139 | pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); | ||
| 140 | } | ||
| 141 | ctx.OpStore(pointer, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { | ||
| 145 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 146 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 147 | ctx.OpStore(pointer, value); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | const Id shift{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 151 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 152 | const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; | ||
| 153 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; | ||
| 154 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; | ||
| 155 | ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); | ||
| 156 | ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { | ||
| 160 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 161 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 162 | ctx.OpStore(pointer, value); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const Id shift{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 166 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 167 | for (u32 i = 0; i < 4; ++i) { | ||
| 168 | const Id index{i == 0 ? base_index | ||
| 169 | : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; | ||
| 170 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 171 | ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0c62c1c54..9415d02f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h | |||
| @@ -19,6 +19,10 @@ public: | |||
| 19 | 19 | ||
| 20 | [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; | 20 | [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; |
| 21 | 21 | ||
| 22 | [[nodiscard]] virtual u32 LocalMemorySize() const = 0; | ||
| 23 | |||
| 24 | [[nodiscard]] virtual u32 SharedMemorySize() const = 0; | ||
| 25 | |||
| 22 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | 26 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; |
| 23 | 27 | ||
| 24 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | 28 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { | |||
| 355 | Inst(Opcode::WriteGlobal128, address, vector); | 355 | Inst(Opcode::WriteGlobal128, address, vector); |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | U32 IREmitter::LoadLocal(const IR::U32& word_offset) { | ||
| 359 | return Inst<U32>(Opcode::LoadLocal, word_offset); | ||
| 360 | } | ||
| 361 | |||
| 362 | void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { | ||
| 363 | Inst(Opcode::WriteLocal, word_offset, value); | ||
| 364 | } | ||
| 365 | |||
| 366 | Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { | ||
| 367 | switch (bit_size) { | ||
| 368 | case 8: | ||
| 369 | return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); | ||
| 370 | case 16: | ||
| 371 | return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); | ||
| 372 | case 32: | ||
| 373 | return Inst(Opcode::LoadSharedU32, offset); | ||
| 374 | case 64: | ||
| 375 | return Inst(Opcode::LoadSharedU64, offset); | ||
| 376 | case 128: | ||
| 377 | return Inst(Opcode::LoadSharedU128, offset); | ||
| 378 | } | ||
| 379 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 380 | } | ||
| 381 | |||
| 382 | void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { | ||
| 383 | switch (bit_size) { | ||
| 384 | case 8: | ||
| 385 | Inst(Opcode::WriteSharedU8, offset, value); | ||
| 386 | break; | ||
| 387 | case 16: | ||
| 388 | Inst(Opcode::WriteSharedU16, offset, value); | ||
| 389 | break; | ||
| 390 | case 32: | ||
| 391 | Inst(Opcode::WriteSharedU32, offset, value); | ||
| 392 | break; | ||
| 393 | case 64: | ||
| 394 | Inst(Opcode::WriteSharedU64, offset, value); | ||
| 395 | break; | ||
| 396 | case 128: | ||
| 397 | Inst(Opcode::WriteSharedU128, offset, value); | ||
| 398 | break; | ||
| 399 | default: | ||
| 400 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 358 | U1 IREmitter::GetZeroFromOp(const Value& op) { | 404 | U1 IREmitter::GetZeroFromOp(const Value& op) { |
| 359 | return Inst<U1>(Opcode::GetZeroFromOp, op); | 405 | return Inst<U1>(Opcode::GetZeroFromOp, op); |
| 360 | } | 406 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -99,6 +99,12 @@ public: | |||
| 99 | void WriteGlobal64(const U64& address, const IR::Value& vector); | 99 | void WriteGlobal64(const U64& address, const IR::Value& vector); |
| 100 | void WriteGlobal128(const U64& address, const IR::Value& vector); | 100 | void WriteGlobal128(const U64& address, const IR::Value& vector); |
| 101 | 101 | ||
| 102 | [[nodiscard]] U32 LoadLocal(const U32& word_offset); | ||
| 103 | void WriteLocal(const U32& word_offset, const U32& value); | ||
| 104 | |||
| 105 | [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); | ||
| 106 | void WriteShared(int bit_size, const U32& offset, const Value& value); | ||
| 107 | |||
| 102 | [[nodiscard]] U1 GetZeroFromOp(const Value& op); | 108 | [[nodiscard]] U1 GetZeroFromOp(const Value& op); |
| 103 | [[nodiscard]] U1 GetSignFromOp(const Value& op); | 109 | [[nodiscard]] U1 GetSignFromOp(const Value& op); |
| 104 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | 110 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { | |||
| 76 | case Opcode::WriteStorage32: | 76 | case Opcode::WriteStorage32: |
| 77 | case Opcode::WriteStorage64: | 77 | case Opcode::WriteStorage64: |
| 78 | case Opcode::WriteStorage128: | 78 | case Opcode::WriteStorage128: |
| 79 | case Opcode::WriteLocal: | ||
| 80 | case Opcode::WriteSharedU8: | ||
| 81 | case Opcode::WriteSharedU16: | ||
| 82 | case Opcode::WriteSharedU32: | ||
| 83 | case Opcode::WriteSharedU64: | ||
| 84 | case Opcode::WriteSharedU128: | ||
| 79 | return true; | 85 | return true; |
| 80 | default: | 86 | default: |
| 81 | return false; | 87 | return false; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, | |||
| 89 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) | 89 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) |
| 90 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) | 90 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) |
| 91 | 91 | ||
| 92 | // Local memory operations | ||
| 93 | OPCODE(LoadLocal, U32, U32, ) | ||
| 94 | OPCODE(WriteLocal, Void, U32, U32, ) | ||
| 95 | |||
| 96 | // Shared memory operations | ||
| 97 | OPCODE(LoadSharedU8, U32, U32, ) | ||
| 98 | OPCODE(LoadSharedS8, U32, U32, ) | ||
| 99 | OPCODE(LoadSharedU16, U32, U32, ) | ||
| 100 | OPCODE(LoadSharedS16, U32, U32, ) | ||
| 101 | OPCODE(LoadSharedU32, U32, U32, ) | ||
| 102 | OPCODE(LoadSharedU64, U32x2, U32, ) | ||
| 103 | OPCODE(LoadSharedU128, U32x4, U32, ) | ||
| 104 | OPCODE(WriteSharedU8, Void, U32, U32, ) | ||
| 105 | OPCODE(WriteSharedU16, Void, U32, U32, ) | ||
| 106 | OPCODE(WriteSharedU32, Void, U32, U32, ) | ||
| 107 | OPCODE(WriteSharedU64, Void, U32, U32x2, ) | ||
| 108 | OPCODE(WriteSharedU128, Void, U32, U32x4, ) | ||
| 109 | |||
| 92 | // Vector utility | 110 | // Vector utility |
| 93 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) | 111 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) |
| 94 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) | 112 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) |
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h | |||
| @@ -21,6 +21,8 @@ struct Program { | |||
| 21 | Info info; | 21 | Info info; |
| 22 | Stage stage{}; | 22 | Stage stage{}; |
| 23 | std::array<u32, 3> workgroup_size{}; | 23 | std::array<u32, 3> workgroup_size{}; |
| 24 | u32 local_memory_size{}; | ||
| 25 | u32 shared_memory_size{}; | ||
| 24 | }; | 26 | }; |
| 25 | 27 | ||
| 26 | [[nodiscard]] std::string DumpProgram(const Program& program); | 28 | [[nodiscard]] std::string DumpProgram(const Program& program); |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 67 | program.blocks = VisitAST(inst_pool, block_pool, env, cfg); | 67 | program.blocks = VisitAST(inst_pool, block_pool, env, cfg); |
| 68 | program.post_order_blocks = PostOrder(program.blocks); | 68 | program.post_order_blocks = PostOrder(program.blocks); |
| 69 | program.stage = env.ShaderStage(); | 69 | program.stage = env.ShaderStage(); |
| 70 | program.local_memory_size = env.LocalMemorySize(); | ||
| 70 | if (program.stage == Stage::Compute) { | 71 | if (program.stage == Stage::Compute) { |
| 71 | program.workgroup_size = env.WorkgroupSize(); | 72 | program.workgroup_size = env.WorkgroupSize(); |
| 73 | program.shared_memory_size = env.SharedMemorySize(); | ||
| 72 | } | 74 | } |
| 73 | RemoveUnreachableBlocks(program); | 75 | RemoveUnreachableBlocks(program); |
| 74 | 76 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..68963c8ea --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp | |||
| @@ -0,0 +1,197 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Size : u64 { | ||
| 12 | U8, | ||
| 13 | S8, | ||
| 14 | U16, | ||
| 15 | S16, | ||
| 16 | B32, | ||
| 17 | B64, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | IR::U32 Offset(TranslatorVisitor& v, u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 25 | BitField<20, 24, u64> absolute_offset; | ||
| 26 | BitField<20, 24, s64> relative_offset; | ||
| 27 | } const encoding{insn}; | ||
| 28 | |||
| 29 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 30 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); | ||
| 31 | } else { | ||
| 32 | const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; | ||
| 33 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::pair<int, bool> GetSize(u64 insn) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<48, 3, Size> size; | ||
| 41 | } const encoding{insn}; | ||
| 42 | |||
| 43 | const Size nnn = encoding.size; | ||
| 44 | switch (encoding.size) { | ||
| 45 | case Size::U8: | ||
| 46 | return {8, false}; | ||
| 47 | case Size::S8: | ||
| 48 | return {8, true}; | ||
| 49 | case Size::U16: | ||
| 50 | return {16, false}; | ||
| 51 | case Size::S16: | ||
| 52 | return {16, true}; | ||
| 53 | case Size::B32: | ||
| 54 | return {32, false}; | ||
| 55 | case Size::B64: | ||
| 56 | return {64, false}; | ||
| 57 | case Size::B128: | ||
| 58 | return {128, false}; | ||
| 59 | default: | ||
| 60 | throw NotImplementedException("Invalid size {}", encoding.size.Value()); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | IR::Reg Reg(u64 insn) { | ||
| 65 | union { | ||
| 66 | u64 raw; | ||
| 67 | BitField<0, 8, IR::Reg> reg; | ||
| 68 | } const encoding{insn}; | ||
| 69 | |||
| 70 | return encoding.reg; | ||
| 71 | } | ||
| 72 | |||
| 73 | IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 74 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); | ||
| 75 | } | ||
| 76 | |||
| 77 | IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 78 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); | ||
| 79 | } | ||
| 80 | } // Anonymous namespace | ||
| 81 | |||
| 82 | void TranslatorVisitor::LDL(u64 insn) { | ||
| 83 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 84 | const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; | ||
| 85 | |||
| 86 | const IR::Reg dest{Reg(insn)}; | ||
| 87 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 88 | switch (bit_size) { | ||
| 89 | case 8: { | ||
| 90 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 91 | X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | case 16: { | ||
| 95 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 96 | X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | case 32: | ||
| 100 | case 64: | ||
| 101 | case 128: | ||
| 102 | if (!IR::IsAligned(dest, bit_size / 32)) { | ||
| 103 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 104 | } | ||
| 105 | X(dest, ir.LoadLocal(word_offset)); | ||
| 106 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 107 | X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); | ||
| 108 | } | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void TranslatorVisitor::LDS(u64 insn) { | ||
| 114 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 115 | const IR::Reg dest{Reg(insn)}; | ||
| 116 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 117 | const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; | ||
| 118 | switch (bit_size) { | ||
| 119 | case 8: | ||
| 120 | case 16: | ||
| 121 | case 32: | ||
| 122 | X(dest, IR::U32{value}); | ||
| 123 | break; | ||
| 124 | case 64: | ||
| 125 | case 128: | ||
| 126 | if (!IR::IsAligned(dest, bit_size / 32)) { | ||
| 127 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 128 | } | ||
| 129 | for (int element = 0; element < bit_size / 32; ++element) { | ||
| 130 | X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); | ||
| 131 | } | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | void TranslatorVisitor::STL(u64 insn) { | ||
| 137 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 138 | const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; | ||
| 139 | |||
| 140 | const IR::Reg reg{Reg(insn)}; | ||
| 141 | const IR::U32 src{X(reg)}; | ||
| 142 | const int bit_size{GetSize(insn).first}; | ||
| 143 | switch (bit_size) { | ||
| 144 | case 8: { | ||
| 145 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 146 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; | ||
| 147 | ir.WriteLocal(word_offset, value); | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | case 16: { | ||
| 151 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 152 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; | ||
| 153 | ir.WriteLocal(word_offset, value); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case 32: | ||
| 157 | case 64: | ||
| 158 | case 128: | ||
| 159 | if (!IR::IsAligned(reg, bit_size / 32)) { | ||
| 160 | throw NotImplementedException("Unaligned source register"); | ||
| 161 | } | ||
| 162 | ir.WriteLocal(word_offset, src); | ||
| 163 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 164 | ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); | ||
| 165 | } | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | void TranslatorVisitor::STS(u64 insn) { | ||
| 171 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 172 | const IR::Reg reg{Reg(insn)}; | ||
| 173 | const int bit_size{GetSize(insn).first}; | ||
| 174 | switch (bit_size) { | ||
| 175 | case 8: | ||
| 176 | case 16: | ||
| 177 | case 32: | ||
| 178 | ir.WriteShared(bit_size, offset, X(reg)); | ||
| 179 | break; | ||
| 180 | case 64: | ||
| 181 | if (!IR::IsAligned(reg, 2)) { | ||
| 182 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 183 | } | ||
| 184 | ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); | ||
| 185 | break; | ||
| 186 | case 128: { | ||
| 187 | if (!IR::IsAligned(reg, 2)) { | ||
| 188 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 189 | } | ||
| 190 | const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; | ||
| 191 | ir.WriteShared(128, offset, vector); | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { | |||
| 193 | ThrowNotImplemented(Opcode::LD); | 193 | ThrowNotImplemented(Opcode::LD); |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | void TranslatorVisitor::LDL(u64) { | ||
| 197 | ThrowNotImplemented(Opcode::LDL); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::LDS(u64) { | ||
| 201 | ThrowNotImplemented(Opcode::LDS); | ||
| 202 | } | ||
| 203 | |||
| 204 | void TranslatorVisitor::LEPC(u64) { | 196 | void TranslatorVisitor::LEPC(u64) { |
| 205 | ThrowNotImplemented(Opcode::LEPC); | 197 | ThrowNotImplemented(Opcode::LEPC); |
| 206 | } | 198 | } |
| @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { | |||
| 309 | ThrowNotImplemented(Opcode::ST); | 301 | ThrowNotImplemented(Opcode::ST); |
| 310 | } | 302 | } |
| 311 | 303 | ||
| 312 | void TranslatorVisitor::STL(u64) { | ||
| 313 | ThrowNotImplemented(Opcode::STL); | ||
| 314 | } | ||
| 315 | |||
| 316 | void TranslatorVisitor::STP(u64) { | 304 | void TranslatorVisitor::STP(u64) { |
| 317 | ThrowNotImplemented(Opcode::STP); | 305 | ThrowNotImplemented(Opcode::STP); |
| 318 | } | 306 | } |
| 319 | 307 | ||
| 320 | void TranslatorVisitor::STS(u64) { | ||
| 321 | ThrowNotImplemented(Opcode::STS); | ||
| 322 | } | ||
| 323 | |||
| 324 | void TranslatorVisitor::SUATOM_cas(u64) { | 308 | void TranslatorVisitor::SUATOM_cas(u64) { |
| 325 | ThrowNotImplemented(Opcode::SUATOM_cas); | 309 | ThrowNotImplemented(Opcode::SUATOM_cas); |
| 326 | } | 310 | } |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60be67228..c932c307b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 200 | case IR::Opcode::LoadStorageS8: | 200 | case IR::Opcode::LoadStorageS8: |
| 201 | case IR::Opcode::WriteStorageU8: | 201 | case IR::Opcode::WriteStorageU8: |
| 202 | case IR::Opcode::WriteStorageS8: | 202 | case IR::Opcode::WriteStorageS8: |
| 203 | case IR::Opcode::LoadSharedU8: | ||
| 204 | case IR::Opcode::LoadSharedS8: | ||
| 205 | case IR::Opcode::WriteSharedU8: | ||
| 203 | case IR::Opcode::SelectU8: | 206 | case IR::Opcode::SelectU8: |
| 204 | case IR::Opcode::ConvertF16S8: | 207 | case IR::Opcode::ConvertF16S8: |
| 205 | case IR::Opcode::ConvertF16U8: | 208 | case IR::Opcode::ConvertF16U8: |
| @@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 224 | case IR::Opcode::LoadStorageS16: | 227 | case IR::Opcode::LoadStorageS16: |
| 225 | case IR::Opcode::WriteStorageU16: | 228 | case IR::Opcode::WriteStorageU16: |
| 226 | case IR::Opcode::WriteStorageS16: | 229 | case IR::Opcode::WriteStorageS16: |
| 230 | case IR::Opcode::LoadSharedU16: | ||
| 231 | case IR::Opcode::LoadSharedS16: | ||
| 232 | case IR::Opcode::WriteSharedU16: | ||
| 227 | case IR::Opcode::SelectU16: | 233 | case IR::Opcode::SelectU16: |
| 228 | case IR::Opcode::BitCastU16F16: | 234 | case IR::Opcode::BitCastU16F16: |
| 229 | case IR::Opcode::BitCastF16U16: | 235 | case IR::Opcode::BitCastF16U16: |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e26047751..0276fc23b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -18,6 +18,8 @@ enum class AttributeType : u8 { | |||
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | struct Profile { | 20 | struct Profile { |
| 21 | u32 supported_spirv{0x00010000}; | ||
| 22 | |||
| 21 | bool unified_descriptor_binding{}; | 23 | bool unified_descriptor_binding{}; |
| 22 | bool support_vertex_instance_id{}; | 24 | bool support_vertex_instance_id{}; |
| 23 | bool support_float_controls{}; | 25 | bool support_float_controls{}; |
| @@ -30,6 +32,7 @@ struct Profile { | |||
| 30 | bool support_fp16_signed_zero_nan_preserve{}; | 32 | bool support_fp16_signed_zero_nan_preserve{}; |
| 31 | bool support_fp32_signed_zero_nan_preserve{}; | 33 | bool support_fp32_signed_zero_nan_preserve{}; |
| 32 | bool support_fp64_signed_zero_nan_preserve{}; | 34 | bool support_fp64_signed_zero_nan_preserve{}; |
| 35 | bool support_explicit_workgroup_layout{}; | ||
| 33 | bool support_vote{}; | 36 | bool support_vote{}; |
| 34 | bool warp_size_potentially_larger_than_guest{}; | 37 | bool warp_size_potentially_larger_than_guest{}; |
| 35 | 38 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 69dd945b2..0d6a32bfd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -114,10 +114,12 @@ public: | |||
| 114 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); | 114 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); |
| 115 | 115 | ||
| 116 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | 116 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; |
| 117 | const u32 local_memory_size{LocalMemorySize()}; | ||
| 117 | const u32 texture_bound{TextureBoundBuffer()}; | 118 | const u32 texture_bound{TextureBoundBuffer()}; |
| 118 | 119 | ||
| 119 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | 120 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) |
| 120 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | 121 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) |
| 122 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 121 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | 123 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) |
| 122 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | 124 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) |
| 123 | .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest)) | 125 | .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest)) |
| @@ -132,7 +134,10 @@ public: | |||
| 132 | file.flush(); | 134 | file.flush(); |
| 133 | if (stage == Shader::Stage::Compute) { | 135 | if (stage == Shader::Stage::Compute) { |
| 134 | const std::array<u32, 3> workgroup_size{WorkgroupSize()}; | 136 | const std::array<u32, 3> workgroup_size{WorkgroupSize()}; |
| 135 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)); | 137 | const u32 shared_memory_size{SharedMemorySize()}; |
| 138 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 139 | .write(reinterpret_cast<const char*>(&shared_memory_size), | ||
| 140 | sizeof(shared_memory_size)); | ||
| 136 | } else { | 141 | } else { |
| 137 | file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); | 142 | file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); |
| 138 | } | 143 | } |
| @@ -278,6 +283,16 @@ public: | |||
| 278 | return maxwell3d->regs.tex_cb_index; | 283 | return maxwell3d->regs.tex_cb_index; |
| 279 | } | 284 | } |
| 280 | 285 | ||
| 286 | u32 LocalMemorySize() const override { | ||
| 287 | const u64 size{sph.LocalMemorySize()}; | ||
| 288 | ASSERT(size <= std::numeric_limits<u32>::max()); | ||
| 289 | return static_cast<u32>(size); | ||
| 290 | } | ||
| 291 | |||
| 292 | u32 SharedMemorySize() const override { | ||
| 293 | throw Shader::LogicError("Requesting shared memory size in graphics stage"); | ||
| 294 | } | ||
| 295 | |||
| 281 | std::array<u32, 3> WorkgroupSize() const override { | 296 | std::array<u32, 3> WorkgroupSize() const override { |
| 282 | throw Shader::LogicError("Requesting workgroup size in a graphics stage"); | 297 | throw Shader::LogicError("Requesting workgroup size in a graphics stage"); |
| 283 | } | 298 | } |
| @@ -313,6 +328,16 @@ public: | |||
| 313 | return kepler_compute->regs.tex_cb_index; | 328 | return kepler_compute->regs.tex_cb_index; |
| 314 | } | 329 | } |
| 315 | 330 | ||
| 331 | u32 LocalMemorySize() const override { | ||
| 332 | const auto& qmd{kepler_compute->launch_description}; | ||
| 333 | return qmd.local_pos_alloc; | ||
| 334 | } | ||
| 335 | |||
| 336 | u32 SharedMemorySize() const override { | ||
| 337 | const auto& qmd{kepler_compute->launch_description}; | ||
| 338 | return qmd.shared_alloc; | ||
| 339 | } | ||
| 340 | |||
| 316 | std::array<u32, 3> WorkgroupSize() const override { | 341 | std::array<u32, 3> WorkgroupSize() const override { |
| 317 | const auto& qmd{kepler_compute->launch_description}; | 342 | const auto& qmd{kepler_compute->launch_description}; |
| 318 | return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | 343 | return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; |
| @@ -366,6 +391,7 @@ public: | |||
| 366 | u64 num_texture_types{}; | 391 | u64 num_texture_types{}; |
| 367 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | 392 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) |
| 368 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | 393 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) |
| 394 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 369 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | 395 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) |
| 370 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | 396 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) |
| 371 | .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) | 397 | .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) |
| @@ -381,7 +407,8 @@ public: | |||
| 381 | texture_types.emplace(key, type); | 407 | texture_types.emplace(key, type); |
| 382 | } | 408 | } |
| 383 | if (stage == Shader::Stage::Compute) { | 409 | if (stage == Shader::Stage::Compute) { |
| 384 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)); | 410 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) |
| 411 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 385 | } else { | 412 | } else { |
| 386 | file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); | 413 | file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); |
| 387 | } | 414 | } |
| @@ -402,6 +429,14 @@ public: | |||
| 402 | return it->second; | 429 | return it->second; |
| 403 | } | 430 | } |
| 404 | 431 | ||
| 432 | u32 LocalMemorySize() const override { | ||
| 433 | return local_memory_size; | ||
| 434 | } | ||
| 435 | |||
| 436 | u32 SharedMemorySize() const override { | ||
| 437 | return shared_memory_size; | ||
| 438 | } | ||
| 439 | |||
| 405 | u32 TextureBoundBuffer() const override { | 440 | u32 TextureBoundBuffer() const override { |
| 406 | return texture_bound; | 441 | return texture_bound; |
| 407 | } | 442 | } |
| @@ -414,6 +449,8 @@ private: | |||
| 414 | std::unique_ptr<u64[]> code; | 449 | std::unique_ptr<u64[]> code; |
| 415 | std::unordered_map<u64, Shader::TextureType> texture_types; | 450 | std::unordered_map<u64, Shader::TextureType> texture_types; |
| 416 | std::array<u32, 3> workgroup_size{}; | 451 | std::array<u32, 3> workgroup_size{}; |
| 452 | u32 local_memory_size{}; | ||
| 453 | u32 shared_memory_size{}; | ||
| 417 | u32 texture_bound{}; | 454 | u32 texture_bound{}; |
| 418 | u32 read_lowest{}; | 455 | u32 read_lowest{}; |
| 419 | u32 read_highest{}; | 456 | u32 read_highest{}; |
| @@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | |||
| 541 | const auto& float_control{device.FloatControlProperties()}; | 578 | const auto& float_control{device.FloatControlProperties()}; |
| 542 | const VkDriverIdKHR driver_id{device.GetDriverID()}; | 579 | const VkDriverIdKHR driver_id{device.GetDriverID()}; |
| 543 | base_profile = Shader::Profile{ | 580 | base_profile = Shader::Profile{ |
| 581 | .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, | ||
| 544 | .unified_descriptor_binding = true, | 582 | .unified_descriptor_binding = true, |
| 545 | .support_vertex_instance_id = false, | 583 | .support_vertex_instance_id = false, |
| 546 | .support_float_controls = true, | 584 | .support_float_controls = true, |
| @@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | |||
| 558 | float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, | 596 | float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, |
| 559 | .support_fp64_signed_zero_nan_preserve = | 597 | .support_fp64_signed_zero_nan_preserve = |
| 560 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, | 598 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, |
| 599 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), | ||
| 561 | .support_vote = true, | 600 | .support_vote = true, |
| 562 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), | 601 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), |
| 563 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, | 602 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, |
| @@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { | |||
| 600 | shader = MakeShaderInfo(env, *cpu_shader_addr); | 639 | shader = MakeShaderInfo(env, *cpu_shader_addr); |
| 601 | } | 640 | } |
| 602 | const ComputePipelineCacheKey key{ | 641 | const ComputePipelineCacheKey key{ |
| 603 | .unique_hash = shader->unique_hash, | 642 | .unique_hash{shader->unique_hash}, |
| 604 | .shared_memory_size = qmd.shared_alloc, | 643 | .shared_memory_size{qmd.shared_alloc}, |
| 605 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, | 644 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, |
| 606 | }; | 645 | }; |
| 607 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; | 646 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 009b74f12..c027598ba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 399 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); | 399 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); |
| 400 | } | 400 | } |
| 401 | 401 | ||
| 402 | VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; | ||
| 403 | if (khr_workgroup_memory_explicit_layout) { | ||
| 404 | workgroup_layout = { | ||
| 405 | .sType = | ||
| 406 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, | ||
| 407 | .pNext = nullptr, | ||
| 408 | .workgroupMemoryExplicitLayout = VK_TRUE, | ||
| 409 | .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, | ||
| 410 | .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, | ||
| 411 | .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, | ||
| 412 | }; | ||
| 413 | SetNext(next, workgroup_layout); | ||
| 414 | } | ||
| 415 | |||
| 402 | if (!ext_depth_range_unrestricted) { | 416 | if (!ext_depth_range_unrestricted) { |
| 403 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 417 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 404 | } | 418 | } |
| @@ -662,6 +676,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 662 | } | 676 | } |
| 663 | 677 | ||
| 664 | bool has_khr_shader_float16_int8{}; | 678 | bool has_khr_shader_float16_int8{}; |
| 679 | bool has_khr_workgroup_memory_explicit_layout{}; | ||
| 665 | bool has_ext_subgroup_size_control{}; | 680 | bool has_ext_subgroup_size_control{}; |
| 666 | bool has_ext_transform_feedback{}; | 681 | bool has_ext_transform_feedback{}; |
| 667 | bool has_ext_custom_border_color{}; | 682 | bool has_ext_custom_border_color{}; |
| @@ -682,6 +697,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 682 | test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); | 697 | test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); |
| 683 | test(khr_uniform_buffer_standard_layout, | 698 | test(khr_uniform_buffer_standard_layout, |
| 684 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 699 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 700 | test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); | ||
| 685 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 701 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); |
| 686 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 702 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 687 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 703 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| @@ -694,6 +710,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 694 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | 710 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); |
| 695 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | 711 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); |
| 696 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | 712 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); |
| 713 | test(has_khr_workgroup_memory_explicit_layout, | ||
| 714 | VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); | ||
| 697 | if (Settings::values.renderer_debug) { | 715 | if (Settings::values.renderer_debug) { |
| 698 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, | 716 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, |
| 699 | true); | 717 | true); |
| @@ -787,6 +805,22 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 787 | ext_extended_dynamic_state = true; | 805 | ext_extended_dynamic_state = true; |
| 788 | } | 806 | } |
| 789 | } | 807 | } |
| 808 | if (has_khr_workgroup_memory_explicit_layout) { | ||
| 809 | VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; | ||
| 810 | layout.sType = | ||
| 811 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; | ||
| 812 | layout.pNext = nullptr; | ||
| 813 | features.pNext = &layout; | ||
| 814 | physical.GetFeatures2KHR(features); | ||
| 815 | |||
| 816 | if (layout.workgroupMemoryExplicitLayout && | ||
| 817 | layout.workgroupMemoryExplicitLayout8BitAccess && | ||
| 818 | layout.workgroupMemoryExplicitLayout16BitAccess && | ||
| 819 | layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { | ||
| 820 | extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); | ||
| 821 | khr_workgroup_memory_explicit_layout = true; | ||
| 822 | } | ||
| 823 | } | ||
| 790 | return extensions; | 824 | return extensions; |
| 791 | } | 825 | } |
| 792 | 826 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c268a4f8d..ac2311e7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -168,11 +168,21 @@ public: | |||
| 168 | return nv_viewport_swizzle; | 168 | return nv_viewport_swizzle; |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 171 | /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. |
| 172 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 172 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 173 | return khr_uniform_buffer_standard_layout; | 173 | return khr_uniform_buffer_standard_layout; |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | /// Returns true if the device supports VK_KHR_spirv_1_4. | ||
| 177 | bool IsKhrSpirv1_4Supported() const { | ||
| 178 | return khr_spirv_1_4; | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. | ||
| 182 | bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { | ||
| 183 | return khr_workgroup_memory_explicit_layout; | ||
| 184 | } | ||
| 185 | |||
| 176 | /// Returns true if the device supports VK_EXT_index_type_uint8. | 186 | /// Returns true if the device supports VK_EXT_index_type_uint8. |
| 177 | bool IsExtIndexTypeUint8Supported() const { | 187 | bool IsExtIndexTypeUint8Supported() const { |
| 178 | return ext_index_type_uint8; | 188 | return ext_index_type_uint8; |
| @@ -296,20 +306,22 @@ private: | |||
| 296 | bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. | 306 | bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. |
| 297 | bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. | 307 | bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. |
| 298 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 308 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 299 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 309 | bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. |
| 300 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 310 | bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. |
| 301 | bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. | 311 | bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. |
| 302 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 312 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 303 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 313 | bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. |
| 304 | bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. | 314 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 305 | bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. | 315 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 306 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | 316 | bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. |
| 307 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | 317 | bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. |
| 308 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. | 318 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. |
| 309 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | 319 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. |
| 310 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 320 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. |
| 311 | bool has_renderdoc{}; ///< Has RenderDoc attached | 321 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. |
| 312 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | 322 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 323 | bool has_renderdoc{}; ///< Has RenderDoc attached | ||
| 324 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | ||
| 313 | 325 | ||
| 314 | // Telemetry parameters | 326 | // Telemetry parameters |
| 315 | std::string vendor_name; ///< Device's driver name. | 327 | std::string vendor_name; ///< Device's driver name. |