diff options
Diffstat (limited to '')
17 files changed, 626 insertions, 17 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 55b846c84..003cbefb1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC | |||
| 14 | backend/spirv/emit_spirv_logical.cpp | 14 | backend/spirv/emit_spirv_logical.cpp |
| 15 | backend/spirv/emit_spirv_memory.cpp | 15 | backend/spirv/emit_spirv_memory.cpp |
| 16 | backend/spirv/emit_spirv_select.cpp | 16 | backend/spirv/emit_spirv_select.cpp |
| 17 | backend/spirv/emit_spirv_shared_memory.cpp | ||
| 17 | backend/spirv/emit_spirv_special.cpp | 18 | backend/spirv/emit_spirv_special.cpp |
| 18 | backend/spirv/emit_spirv_undefined.cpp | 19 | backend/spirv/emit_spirv_undefined.cpp |
| 19 | backend/spirv/emit_spirv_warp.cpp | 20 | backend/spirv/emit_spirv_warp.cpp |
| @@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC | |||
| 111 | frontend/maxwell/translate/impl/load_constant.cpp | 112 | frontend/maxwell/translate/impl/load_constant.cpp |
| 112 | frontend/maxwell/translate/impl/load_effective_address.cpp | 113 | frontend/maxwell/translate/impl/load_effective_address.cpp |
| 113 | frontend/maxwell/translate/impl/load_store_attribute.cpp | 114 | frontend/maxwell/translate/impl/load_store_attribute.cpp |
| 115 | frontend/maxwell/translate/impl/load_store_local_shared.cpp | ||
| 114 | frontend/maxwell/translate/impl/load_store_memory.cpp | 116 | frontend/maxwell/translate/impl/load_store_memory.cpp |
| 115 | frontend/maxwell/translate/impl/logic_operation.cpp | 117 | frontend/maxwell/translate/impl/logic_operation.cpp |
| 116 | frontend/maxwell/translate/impl/logic_operation_three_input.cpp | 118 | frontend/maxwell/translate/impl/logic_operation_three_input.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a8ca33c1d..96d0e9b4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/div_ceil.h" | ||
| 12 | #include "shader_recompiler/backend/spirv/emit_context.h" | 13 | #include "shader_recompiler/backend/spirv/emit_context.h" |
| 13 | 14 | ||
| 14 | namespace Shader::Backend::SPIRV { | 15 | namespace Shader::Backend::SPIRV { |
| @@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie | |||
| 96 | } | 97 | } |
| 97 | 98 | ||
| 98 | EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) | 99 | EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) |
| 99 | : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { | 100 | : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { |
| 100 | AddCapability(spv::Capability::Shader); | 101 | AddCapability(spv::Capability::Shader); |
| 101 | DefineCommonTypes(program.info); | 102 | DefineCommonTypes(program.info); |
| 102 | DefineCommonConstants(); | 103 | DefineCommonConstants(); |
| 103 | DefineInterfaces(program.info); | 104 | DefineInterfaces(program.info); |
| 105 | DefineLocalMemory(program); | ||
| 106 | DefineSharedMemory(program); | ||
| 104 | DefineConstantBuffers(program.info, binding); | 107 | DefineConstantBuffers(program.info, binding); |
| 105 | DefineStorageBuffers(program.info, binding); | 108 | DefineStorageBuffers(program.info, binding); |
| 106 | DefineTextures(program.info, binding); | 109 | DefineTextures(program.info, binding); |
| @@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||
| 143 | F32.Define(*this, TypeFloat(32), "f32"); | 146 | F32.Define(*this, TypeFloat(32), "f32"); |
| 144 | U32.Define(*this, TypeInt(32, false), "u32"); | 147 | U32.Define(*this, TypeInt(32, false), "u32"); |
| 145 | 148 | ||
| 149 | private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); | ||
| 150 | |||
| 146 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); | 151 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); |
| 147 | input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); | 152 | input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); |
| 148 | input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); | 153 | input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); |
| @@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) { | |||
| 184 | DefineOutputs(info); | 189 | DefineOutputs(info); |
| 185 | } | 190 | } |
| 186 | 191 | ||
| 192 | void EmitContext::DefineLocalMemory(const IR::Program& program) { | ||
| 193 | if (program.local_memory_size == 0) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; | ||
| 197 | const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; | ||
| 198 | const Id pointer{TypePointer(spv::StorageClass::Private, type)}; | ||
| 199 | local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); | ||
| 200 | if (profile.supported_spirv >= 0x00010400) { | ||
| 201 | interfaces.push_back(local_memory); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||
| 206 | if (program.shared_memory_size == 0) { | ||
| 207 | return; | ||
| 208 | } | ||
| 209 | const auto make{[&](Id element_type, u32 element_size) { | ||
| 210 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; | ||
| 211 | const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; | ||
| 212 | Decorate(array_type, spv::Decoration::ArrayStride, element_size); | ||
| 213 | |||
| 214 | const Id struct_type{TypeStruct(array_type)}; | ||
| 215 | MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); | ||
| 216 | Decorate(struct_type, spv::Decoration::Block); | ||
| 217 | |||
| 218 | const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; | ||
| 219 | const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; | ||
| 220 | const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; | ||
| 221 | Decorate(variable, spv::Decoration::Aliased); | ||
| 222 | interfaces.push_back(variable); | ||
| 223 | |||
| 224 | return std::make_pair(variable, element_pointer); | ||
| 225 | }}; | ||
| 226 | if (profile.support_explicit_workgroup_layout) { | ||
| 227 | AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); | ||
| 228 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); | ||
| 229 | if (program.info.uses_int8) { | ||
| 230 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); | ||
| 231 | std::tie(shared_memory_u8, shared_u8) = make(U8, 1); | ||
| 232 | } | ||
| 233 | if (program.info.uses_int16) { | ||
| 234 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); | ||
| 235 | std::tie(shared_memory_u16, shared_u16) = make(U16, 2); | ||
| 236 | } | ||
| 237 | std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); | ||
| 238 | std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); | ||
| 239 | std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); | ||
| 240 | } | ||
| 241 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; | ||
| 242 | const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; | ||
| 243 | const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; | ||
| 244 | shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); | ||
| 245 | shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); | ||
| 246 | interfaces.push_back(shared_memory_u32); | ||
| 247 | |||
| 248 | const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; | ||
| 249 | const auto make_function{[&](u32 mask, u32 size) { | ||
| 250 | const Id loop_header{OpLabel()}; | ||
| 251 | const Id continue_block{OpLabel()}; | ||
| 252 | const Id merge_block{OpLabel()}; | ||
| 253 | |||
| 254 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 255 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 256 | const Id insert_value{OpFunctionParameter(U32[1])}; | ||
| 257 | AddLabel(); | ||
| 258 | OpBranch(loop_header); | ||
| 259 | |||
| 260 | AddLabel(loop_header); | ||
| 261 | const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; | ||
| 262 | const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; | ||
| 263 | const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; | ||
| 264 | const Id count{Constant(U32[1], size)}; | ||
| 265 | OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 266 | OpBranch(continue_block); | ||
| 267 | |||
| 268 | AddLabel(continue_block); | ||
| 269 | const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; | ||
| 270 | const Id old_value{OpLoad(U32[1], word_pointer)}; | ||
| 271 | const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; | ||
| 272 | const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), | ||
| 273 | u32_zero_value, u32_zero_value, new_value, | ||
| 274 | old_value)}; | ||
| 275 | const Id success{OpIEqual(U1, atomic_res, old_value)}; | ||
| 276 | OpBranchConditional(success, merge_block, loop_header); | ||
| 277 | |||
| 278 | AddLabel(merge_block); | ||
| 279 | OpReturn(); | ||
| 280 | OpFunctionEnd(); | ||
| 281 | return func; | ||
| 282 | }}; | ||
| 283 | if (program.info.uses_int8) { | ||
| 284 | shared_store_u8_func = make_function(24, 8); | ||
| 285 | } | ||
| 286 | if (program.info.uses_int16) { | ||
| 287 | shared_store_u16_func = make_function(16, 16); | ||
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 187 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | 291 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { |
| 188 | if (info.constant_buffer_descriptors.empty()) { | 292 | if (info.constant_buffer_descriptors.empty()) { |
| 189 | return; | 293 | return; |
| @@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | |||
| 234 | Decorate(id, spv::Decoration::Binding, binding); | 338 | Decorate(id, spv::Decoration::Binding, binding); |
| 235 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | 339 | Decorate(id, spv::Decoration::DescriptorSet, 0U); |
| 236 | Name(id, fmt::format("ssbo{}", index)); | 340 | Name(id, fmt::format("ssbo{}", index)); |
| 341 | if (profile.supported_spirv >= 0x00010400) { | ||
| 342 | interfaces.push_back(id); | ||
| 343 | } | ||
| 237 | std::fill_n(ssbos.data() + index, desc.count, id); | 344 | std::fill_n(ssbos.data() + index, desc.count, id); |
| 238 | index += desc.count; | 345 | index += desc.count; |
| 239 | binding += desc.count; | 346 | binding += desc.count; |
| @@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { | |||
| 261 | .image_type{image_type}, | 368 | .image_type{image_type}, |
| 262 | }); | 369 | }); |
| 263 | } | 370 | } |
| 371 | if (profile.supported_spirv >= 0x00010400) { | ||
| 372 | interfaces.push_back(id); | ||
| 373 | } | ||
| 264 | binding += desc.count; | 374 | binding += desc.count; |
| 265 | } | 375 | } |
| 266 | } | 376 | } |
| @@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: | |||
| 363 | for (size_t i = 0; i < desc.count; ++i) { | 473 | for (size_t i = 0; i < desc.count; ++i) { |
| 364 | cbufs[desc.index + i].*member_type = id; | 474 | cbufs[desc.index + i].*member_type = id; |
| 365 | } | 475 | } |
| 476 | if (profile.supported_spirv >= 0x00010400) { | ||
| 477 | interfaces.push_back(id); | ||
| 478 | } | ||
| 366 | binding += desc.count; | 479 | binding += desc.count; |
| 367 | } | 480 | } |
| 368 | } | 481 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 01b7b665d..1a4e8221a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -73,6 +73,14 @@ public: | |||
| 73 | 73 | ||
| 74 | UniformDefinitions uniform_types; | 74 | UniformDefinitions uniform_types; |
| 75 | 75 | ||
| 76 | Id private_u32{}; | ||
| 77 | |||
| 78 | Id shared_u8{}; | ||
| 79 | Id shared_u16{}; | ||
| 80 | Id shared_u32{}; | ||
| 81 | Id shared_u32x2{}; | ||
| 82 | Id shared_u32x4{}; | ||
| 83 | |||
| 76 | Id input_f32{}; | 84 | Id input_f32{}; |
| 77 | Id input_u32{}; | 85 | Id input_u32{}; |
| 78 | Id input_s32{}; | 86 | Id input_s32{}; |
| @@ -96,6 +104,17 @@ public: | |||
| 96 | Id base_vertex{}; | 104 | Id base_vertex{}; |
| 97 | Id front_face{}; | 105 | Id front_face{}; |
| 98 | 106 | ||
| 107 | Id local_memory{}; | ||
| 108 | |||
| 109 | Id shared_memory_u8{}; | ||
| 110 | Id shared_memory_u16{}; | ||
| 111 | Id shared_memory_u32{}; | ||
| 112 | Id shared_memory_u32x2{}; | ||
| 113 | Id shared_memory_u32x4{}; | ||
| 114 | |||
| 115 | Id shared_store_u8_func{}; | ||
| 116 | Id shared_store_u16_func{}; | ||
| 117 | |||
| 99 | Id input_position{}; | 118 | Id input_position{}; |
| 100 | std::array<Id, 32> input_generics{}; | 119 | std::array<Id, 32> input_generics{}; |
| 101 | 120 | ||
| @@ -111,6 +130,8 @@ private: | |||
| 111 | void DefineCommonTypes(const Info& info); | 130 | void DefineCommonTypes(const Info& info); |
| 112 | void DefineCommonConstants(); | 131 | void DefineCommonConstants(); |
| 113 | void DefineInterfaces(const Info& info); | 132 | void DefineInterfaces(const Info& info); |
| 133 | void DefineLocalMemory(const IR::Program& program); | ||
| 134 | void DefineSharedMemory(const IR::Program& program); | ||
| 114 | void DefineConstantBuffers(const Info& info, u32& binding); | 135 | void DefineConstantBuffers(const Info& info, u32& binding); |
| 115 | void DefineStorageBuffers(const Info& info, u32& binding); | 136 | void DefineStorageBuffers(const Info& info, u32& binding); |
| 116 | void DefineTextures(const Info& info, u32& binding); | 137 | void DefineTextures(const Info& info, u32& binding); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 837f0e858..4f62af959 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx); | |||
| 58 | void EmitSetOFlag(EmitContext& ctx); | 58 | void EmitSetOFlag(EmitContext& ctx); |
| 59 | Id EmitWorkgroupId(EmitContext& ctx); | 59 | Id EmitWorkgroupId(EmitContext& ctx); |
| 60 | Id EmitLocalInvocationId(EmitContext& ctx); | 60 | Id EmitLocalInvocationId(EmitContext& ctx); |
| 61 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset); | ||
| 62 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); | ||
| 61 | Id EmitUndefU1(EmitContext& ctx); | 63 | Id EmitUndefU1(EmitContext& ctx); |
| 62 | Id EmitUndefU8(EmitContext& ctx); | 64 | Id EmitUndefU8(EmitContext& ctx); |
| 63 | Id EmitUndefU16(EmitContext& ctx); | 65 | Id EmitUndefU16(EmitContext& ctx); |
| @@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va | |||
| 94 | Id value); | 96 | Id value); |
| 95 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 97 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 96 | Id value); | 98 | Id value); |
| 99 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset); | ||
| 100 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset); | ||
| 101 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset); | ||
| 102 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset); | ||
| 103 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset); | ||
| 104 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset); | ||
| 105 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset); | ||
| 106 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); | ||
| 107 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); | ||
| 108 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); | ||
| 109 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); | ||
| 110 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); | ||
| 97 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | 111 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); |
| 98 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | 112 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); |
| 99 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | 113 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4cbc2aec1..52dcef8a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) { | |||
| 238 | return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); | 238 | return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { | ||
| 242 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 243 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 244 | } | ||
| 245 | |||
| 246 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { | ||
| 247 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 248 | ctx.OpStore(pointer, value); | ||
| 249 | } | ||
| 250 | |||
| 241 | } // namespace Shader::Backend::SPIRV | 251 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..fa2fc9ab4 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp | |||
| @@ -0,0 +1,175 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { | ||
| 10 | const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; | ||
| 11 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 12 | return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id Word(EmitContext& ctx, Id offset) { | ||
| 16 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 17 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 18 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 19 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 20 | } | ||
| 21 | |||
| 22 | std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { | ||
| 23 | const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; | ||
| 24 | const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; | ||
| 25 | const Id count_id{ctx.Constant(ctx.U32[1], count)}; | ||
| 26 | return {bit, count_id}; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { | ||
| 31 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 32 | const Id pointer{ | ||
| 33 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 34 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 35 | } else { | ||
| 36 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 37 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { | ||
| 42 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 43 | const Id pointer{ | ||
| 44 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 45 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 46 | } else { | ||
| 47 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 48 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { | ||
| 53 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 54 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 55 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 56 | } else { | ||
| 57 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 58 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { | ||
| 63 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 64 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 65 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 66 | } else { | ||
| 67 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 68 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { | ||
| 73 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 74 | const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; | ||
| 75 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 76 | } else { | ||
| 77 | return Word(ctx, offset); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { | ||
| 82 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 83 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 84 | return ctx.OpLoad(ctx.U32[2], pointer); | ||
| 85 | } else { | ||
| 86 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 87 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 88 | const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; | ||
| 89 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; | ||
| 90 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; | ||
| 91 | return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), | ||
| 92 | ctx.OpLoad(ctx.U32[1], rhs_pointer)); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { | ||
| 97 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 98 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 99 | return ctx.OpLoad(ctx.U32[4], pointer); | ||
| 100 | } | ||
| 101 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 102 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 103 | std::array<Id, 4> values{}; | ||
| 104 | for (u32 i = 0; i < 4; ++i) { | ||
| 105 | const Id index{i == 0 ? base_index | ||
| 106 | : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; | ||
| 107 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 108 | values[i] = ctx.OpLoad(ctx.U32[1], pointer); | ||
| 109 | } | ||
| 110 | return ctx.OpCompositeConstruct(ctx.U32[4], values); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { | ||
| 114 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 115 | const Id pointer{ | ||
| 116 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 117 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); | ||
| 118 | } else { | ||
| 119 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { | ||
| 124 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 125 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 126 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); | ||
| 127 | } else { | ||
| 128 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { | ||
| 133 | Id pointer{}; | ||
| 134 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 135 | pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); | ||
| 136 | } else { | ||
| 137 | const Id shift{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 138 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 139 | pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); | ||
| 140 | } | ||
| 141 | ctx.OpStore(pointer, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { | ||
| 145 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 146 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 147 | ctx.OpStore(pointer, value); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | const Id shift{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 151 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 152 | const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; | ||
| 153 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; | ||
| 154 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; | ||
| 155 | ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); | ||
| 156 | ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { | ||
| 160 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 161 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 162 | ctx.OpStore(pointer, value); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const Id shift{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 166 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 167 | for (u32 i = 0; i < 4; ++i) { | ||
| 168 | const Id index{i == 0 ? base_index | ||
| 169 | : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; | ||
| 170 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 171 | ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0c62c1c54..9415d02f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h | |||
| @@ -19,6 +19,10 @@ public: | |||
| 19 | 19 | ||
| 20 | [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; | 20 | [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; |
| 21 | 21 | ||
| 22 | [[nodiscard]] virtual u32 LocalMemorySize() const = 0; | ||
| 23 | |||
| 24 | [[nodiscard]] virtual u32 SharedMemorySize() const = 0; | ||
| 25 | |||
| 22 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | 26 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; |
| 23 | 27 | ||
| 24 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | 28 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { | |||
| 355 | Inst(Opcode::WriteGlobal128, address, vector); | 355 | Inst(Opcode::WriteGlobal128, address, vector); |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | U32 IREmitter::LoadLocal(const IR::U32& word_offset) { | ||
| 359 | return Inst<U32>(Opcode::LoadLocal, word_offset); | ||
| 360 | } | ||
| 361 | |||
| 362 | void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { | ||
| 363 | Inst(Opcode::WriteLocal, word_offset, value); | ||
| 364 | } | ||
| 365 | |||
| 366 | Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { | ||
| 367 | switch (bit_size) { | ||
| 368 | case 8: | ||
| 369 | return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); | ||
| 370 | case 16: | ||
| 371 | return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); | ||
| 372 | case 32: | ||
| 373 | return Inst(Opcode::LoadSharedU32, offset); | ||
| 374 | case 64: | ||
| 375 | return Inst(Opcode::LoadSharedU64, offset); | ||
| 376 | case 128: | ||
| 377 | return Inst(Opcode::LoadSharedU128, offset); | ||
| 378 | } | ||
| 379 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 380 | } | ||
| 381 | |||
| 382 | void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { | ||
| 383 | switch (bit_size) { | ||
| 384 | case 8: | ||
| 385 | Inst(Opcode::WriteSharedU8, offset, value); | ||
| 386 | break; | ||
| 387 | case 16: | ||
| 388 | Inst(Opcode::WriteSharedU16, offset, value); | ||
| 389 | break; | ||
| 390 | case 32: | ||
| 391 | Inst(Opcode::WriteSharedU32, offset, value); | ||
| 392 | break; | ||
| 393 | case 64: | ||
| 394 | Inst(Opcode::WriteSharedU64, offset, value); | ||
| 395 | break; | ||
| 396 | case 128: | ||
| 397 | Inst(Opcode::WriteSharedU128, offset, value); | ||
| 398 | break; | ||
| 399 | default: | ||
| 400 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 358 | U1 IREmitter::GetZeroFromOp(const Value& op) { | 404 | U1 IREmitter::GetZeroFromOp(const Value& op) { |
| 359 | return Inst<U1>(Opcode::GetZeroFromOp, op); | 405 | return Inst<U1>(Opcode::GetZeroFromOp, op); |
| 360 | } | 406 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -99,6 +99,12 @@ public: | |||
| 99 | void WriteGlobal64(const U64& address, const IR::Value& vector); | 99 | void WriteGlobal64(const U64& address, const IR::Value& vector); |
| 100 | void WriteGlobal128(const U64& address, const IR::Value& vector); | 100 | void WriteGlobal128(const U64& address, const IR::Value& vector); |
| 101 | 101 | ||
| 102 | [[nodiscard]] U32 LoadLocal(const U32& word_offset); | ||
| 103 | void WriteLocal(const U32& word_offset, const U32& value); | ||
| 104 | |||
| 105 | [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); | ||
| 106 | void WriteShared(int bit_size, const U32& offset, const Value& value); | ||
| 107 | |||
| 102 | [[nodiscard]] U1 GetZeroFromOp(const Value& op); | 108 | [[nodiscard]] U1 GetZeroFromOp(const Value& op); |
| 103 | [[nodiscard]] U1 GetSignFromOp(const Value& op); | 109 | [[nodiscard]] U1 GetSignFromOp(const Value& op); |
| 104 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | 110 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { | |||
| 76 | case Opcode::WriteStorage32: | 76 | case Opcode::WriteStorage32: |
| 77 | case Opcode::WriteStorage64: | 77 | case Opcode::WriteStorage64: |
| 78 | case Opcode::WriteStorage128: | 78 | case Opcode::WriteStorage128: |
| 79 | case Opcode::WriteLocal: | ||
| 80 | case Opcode::WriteSharedU8: | ||
| 81 | case Opcode::WriteSharedU16: | ||
| 82 | case Opcode::WriteSharedU32: | ||
| 83 | case Opcode::WriteSharedU64: | ||
| 84 | case Opcode::WriteSharedU128: | ||
| 79 | return true; | 85 | return true; |
| 80 | default: | 86 | default: |
| 81 | return false; | 87 | return false; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, | |||
| 89 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) | 89 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) |
| 90 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) | 90 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) |
| 91 | 91 | ||
| 92 | // Local memory operations | ||
| 93 | OPCODE(LoadLocal, U32, U32, ) | ||
| 94 | OPCODE(WriteLocal, Void, U32, U32, ) | ||
| 95 | |||
| 96 | // Shared memory operations | ||
| 97 | OPCODE(LoadSharedU8, U32, U32, ) | ||
| 98 | OPCODE(LoadSharedS8, U32, U32, ) | ||
| 99 | OPCODE(LoadSharedU16, U32, U32, ) | ||
| 100 | OPCODE(LoadSharedS16, U32, U32, ) | ||
| 101 | OPCODE(LoadSharedU32, U32, U32, ) | ||
| 102 | OPCODE(LoadSharedU64, U32x2, U32, ) | ||
| 103 | OPCODE(LoadSharedU128, U32x4, U32, ) | ||
| 104 | OPCODE(WriteSharedU8, Void, U32, U32, ) | ||
| 105 | OPCODE(WriteSharedU16, Void, U32, U32, ) | ||
| 106 | OPCODE(WriteSharedU32, Void, U32, U32, ) | ||
| 107 | OPCODE(WriteSharedU64, Void, U32, U32x2, ) | ||
| 108 | OPCODE(WriteSharedU128, Void, U32, U32x4, ) | ||
| 109 | |||
| 92 | // Vector utility | 110 | // Vector utility |
| 93 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) | 111 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) |
| 94 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) | 112 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) |
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h | |||
| @@ -21,6 +21,8 @@ struct Program { | |||
| 21 | Info info; | 21 | Info info; |
| 22 | Stage stage{}; | 22 | Stage stage{}; |
| 23 | std::array<u32, 3> workgroup_size{}; | 23 | std::array<u32, 3> workgroup_size{}; |
| 24 | u32 local_memory_size{}; | ||
| 25 | u32 shared_memory_size{}; | ||
| 24 | }; | 26 | }; |
| 25 | 27 | ||
| 26 | [[nodiscard]] std::string DumpProgram(const Program& program); | 28 | [[nodiscard]] std::string DumpProgram(const Program& program); |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 67 | program.blocks = VisitAST(inst_pool, block_pool, env, cfg); | 67 | program.blocks = VisitAST(inst_pool, block_pool, env, cfg); |
| 68 | program.post_order_blocks = PostOrder(program.blocks); | 68 | program.post_order_blocks = PostOrder(program.blocks); |
| 69 | program.stage = env.ShaderStage(); | 69 | program.stage = env.ShaderStage(); |
| 70 | program.local_memory_size = env.LocalMemorySize(); | ||
| 70 | if (program.stage == Stage::Compute) { | 71 | if (program.stage == Stage::Compute) { |
| 71 | program.workgroup_size = env.WorkgroupSize(); | 72 | program.workgroup_size = env.WorkgroupSize(); |
| 73 | program.shared_memory_size = env.SharedMemorySize(); | ||
| 72 | } | 74 | } |
| 73 | RemoveUnreachableBlocks(program); | 75 | RemoveUnreachableBlocks(program); |
| 74 | 76 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..68963c8ea --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp | |||
| @@ -0,0 +1,197 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Size : u64 { | ||
| 12 | U8, | ||
| 13 | S8, | ||
| 14 | U16, | ||
| 15 | S16, | ||
| 16 | B32, | ||
| 17 | B64, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | IR::U32 Offset(TranslatorVisitor& v, u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 25 | BitField<20, 24, u64> absolute_offset; | ||
| 26 | BitField<20, 24, s64> relative_offset; | ||
| 27 | } const encoding{insn}; | ||
| 28 | |||
| 29 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 30 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); | ||
| 31 | } else { | ||
| 32 | const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; | ||
| 33 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::pair<int, bool> GetSize(u64 insn) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<48, 3, Size> size; | ||
| 41 | } const encoding{insn}; | ||
| 42 | |||
| 43 | const Size nnn = encoding.size; | ||
| 44 | switch (encoding.size) { | ||
| 45 | case Size::U8: | ||
| 46 | return {8, false}; | ||
| 47 | case Size::S8: | ||
| 48 | return {8, true}; | ||
| 49 | case Size::U16: | ||
| 50 | return {16, false}; | ||
| 51 | case Size::S16: | ||
| 52 | return {16, true}; | ||
| 53 | case Size::B32: | ||
| 54 | return {32, false}; | ||
| 55 | case Size::B64: | ||
| 56 | return {64, false}; | ||
| 57 | case Size::B128: | ||
| 58 | return {128, false}; | ||
| 59 | default: | ||
| 60 | throw NotImplementedException("Invalid size {}", encoding.size.Value()); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | IR::Reg Reg(u64 insn) { | ||
| 65 | union { | ||
| 66 | u64 raw; | ||
| 67 | BitField<0, 8, IR::Reg> reg; | ||
| 68 | } const encoding{insn}; | ||
| 69 | |||
| 70 | return encoding.reg; | ||
| 71 | } | ||
| 72 | |||
| 73 | IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 74 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); | ||
| 75 | } | ||
| 76 | |||
| 77 | IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 78 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); | ||
| 79 | } | ||
| 80 | } // Anonymous namespace | ||
| 81 | |||
| 82 | void TranslatorVisitor::LDL(u64 insn) { | ||
| 83 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 84 | const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; | ||
| 85 | |||
| 86 | const IR::Reg dest{Reg(insn)}; | ||
| 87 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 88 | switch (bit_size) { | ||
| 89 | case 8: { | ||
| 90 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 91 | X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | case 16: { | ||
| 95 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 96 | X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | case 32: | ||
| 100 | case 64: | ||
| 101 | case 128: | ||
| 102 | if (!IR::IsAligned(dest, bit_size / 32)) { | ||
| 103 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 104 | } | ||
| 105 | X(dest, ir.LoadLocal(word_offset)); | ||
| 106 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 107 | X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); | ||
| 108 | } | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void TranslatorVisitor::LDS(u64 insn) { | ||
| 114 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 115 | const IR::Reg dest{Reg(insn)}; | ||
| 116 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 117 | const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; | ||
| 118 | switch (bit_size) { | ||
| 119 | case 8: | ||
| 120 | case 16: | ||
| 121 | case 32: | ||
| 122 | X(dest, IR::U32{value}); | ||
| 123 | break; | ||
| 124 | case 64: | ||
| 125 | case 128: | ||
| 126 | if (!IR::IsAligned(dest, bit_size / 32)) { | ||
| 127 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 128 | } | ||
| 129 | for (int element = 0; element < bit_size / 32; ++element) { | ||
| 130 | X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); | ||
| 131 | } | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | void TranslatorVisitor::STL(u64 insn) { | ||
| 137 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 138 | const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; | ||
| 139 | |||
| 140 | const IR::Reg reg{Reg(insn)}; | ||
| 141 | const IR::U32 src{X(reg)}; | ||
| 142 | const int bit_size{GetSize(insn).first}; | ||
| 143 | switch (bit_size) { | ||
| 144 | case 8: { | ||
| 145 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 146 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; | ||
| 147 | ir.WriteLocal(word_offset, value); | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | case 16: { | ||
| 151 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 152 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; | ||
| 153 | ir.WriteLocal(word_offset, value); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case 32: | ||
| 157 | case 64: | ||
| 158 | case 128: | ||
| 159 | if (!IR::IsAligned(reg, bit_size / 32)) { | ||
| 160 | throw NotImplementedException("Unaligned source register"); | ||
| 161 | } | ||
| 162 | ir.WriteLocal(word_offset, src); | ||
| 163 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 164 | ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); | ||
| 165 | } | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | void TranslatorVisitor::STS(u64 insn) { | ||
| 171 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 172 | const IR::Reg reg{Reg(insn)}; | ||
| 173 | const int bit_size{GetSize(insn).first}; | ||
| 174 | switch (bit_size) { | ||
| 175 | case 8: | ||
| 176 | case 16: | ||
| 177 | case 32: | ||
| 178 | ir.WriteShared(bit_size, offset, X(reg)); | ||
| 179 | break; | ||
| 180 | case 64: | ||
| 181 | if (!IR::IsAligned(reg, 2)) { | ||
| 182 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 183 | } | ||
| 184 | ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); | ||
| 185 | break; | ||
| 186 | case 128: { | ||
| 187 | if (!IR::IsAligned(reg, 2)) { | ||
| 188 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 189 | } | ||
| 190 | const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; | ||
| 191 | ir.WriteShared(128, offset, vector); | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { | |||
| 193 | ThrowNotImplemented(Opcode::LD); | 193 | ThrowNotImplemented(Opcode::LD); |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | void TranslatorVisitor::LDL(u64) { | ||
| 197 | ThrowNotImplemented(Opcode::LDL); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::LDS(u64) { | ||
| 201 | ThrowNotImplemented(Opcode::LDS); | ||
| 202 | } | ||
| 203 | |||
| 204 | void TranslatorVisitor::LEPC(u64) { | 196 | void TranslatorVisitor::LEPC(u64) { |
| 205 | ThrowNotImplemented(Opcode::LEPC); | 197 | ThrowNotImplemented(Opcode::LEPC); |
| 206 | } | 198 | } |
| @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { | |||
| 309 | ThrowNotImplemented(Opcode::ST); | 301 | ThrowNotImplemented(Opcode::ST); |
| 310 | } | 302 | } |
| 311 | 303 | ||
| 312 | void TranslatorVisitor::STL(u64) { | ||
| 313 | ThrowNotImplemented(Opcode::STL); | ||
| 314 | } | ||
| 315 | |||
| 316 | void TranslatorVisitor::STP(u64) { | 304 | void TranslatorVisitor::STP(u64) { |
| 317 | ThrowNotImplemented(Opcode::STP); | 305 | ThrowNotImplemented(Opcode::STP); |
| 318 | } | 306 | } |
| 319 | 307 | ||
| 320 | void TranslatorVisitor::STS(u64) { | ||
| 321 | ThrowNotImplemented(Opcode::STS); | ||
| 322 | } | ||
| 323 | |||
| 324 | void TranslatorVisitor::SUATOM_cas(u64) { | 308 | void TranslatorVisitor::SUATOM_cas(u64) { |
| 325 | ThrowNotImplemented(Opcode::SUATOM_cas); | 309 | ThrowNotImplemented(Opcode::SUATOM_cas); |
| 326 | } | 310 | } |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60be67228..c932c307b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 200 | case IR::Opcode::LoadStorageS8: | 200 | case IR::Opcode::LoadStorageS8: |
| 201 | case IR::Opcode::WriteStorageU8: | 201 | case IR::Opcode::WriteStorageU8: |
| 202 | case IR::Opcode::WriteStorageS8: | 202 | case IR::Opcode::WriteStorageS8: |
| 203 | case IR::Opcode::LoadSharedU8: | ||
| 204 | case IR::Opcode::LoadSharedS8: | ||
| 205 | case IR::Opcode::WriteSharedU8: | ||
| 203 | case IR::Opcode::SelectU8: | 206 | case IR::Opcode::SelectU8: |
| 204 | case IR::Opcode::ConvertF16S8: | 207 | case IR::Opcode::ConvertF16S8: |
| 205 | case IR::Opcode::ConvertF16U8: | 208 | case IR::Opcode::ConvertF16U8: |
| @@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 224 | case IR::Opcode::LoadStorageS16: | 227 | case IR::Opcode::LoadStorageS16: |
| 225 | case IR::Opcode::WriteStorageU16: | 228 | case IR::Opcode::WriteStorageU16: |
| 226 | case IR::Opcode::WriteStorageS16: | 229 | case IR::Opcode::WriteStorageS16: |
| 230 | case IR::Opcode::LoadSharedU16: | ||
| 231 | case IR::Opcode::LoadSharedS16: | ||
| 232 | case IR::Opcode::WriteSharedU16: | ||
| 227 | case IR::Opcode::SelectU16: | 233 | case IR::Opcode::SelectU16: |
| 228 | case IR::Opcode::BitCastU16F16: | 234 | case IR::Opcode::BitCastU16F16: |
| 229 | case IR::Opcode::BitCastF16U16: | 235 | case IR::Opcode::BitCastF16U16: |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e26047751..0276fc23b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -18,6 +18,8 @@ enum class AttributeType : u8 { | |||
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | struct Profile { | 20 | struct Profile { |
| 21 | u32 supported_spirv{0x00010000}; | ||
| 22 | |||
| 21 | bool unified_descriptor_binding{}; | 23 | bool unified_descriptor_binding{}; |
| 22 | bool support_vertex_instance_id{}; | 24 | bool support_vertex_instance_id{}; |
| 23 | bool support_float_controls{}; | 25 | bool support_float_controls{}; |
| @@ -30,6 +32,7 @@ struct Profile { | |||
| 30 | bool support_fp16_signed_zero_nan_preserve{}; | 32 | bool support_fp16_signed_zero_nan_preserve{}; |
| 31 | bool support_fp32_signed_zero_nan_preserve{}; | 33 | bool support_fp32_signed_zero_nan_preserve{}; |
| 32 | bool support_fp64_signed_zero_nan_preserve{}; | 34 | bool support_fp64_signed_zero_nan_preserve{}; |
| 35 | bool support_explicit_workgroup_layout{}; | ||
| 33 | bool support_vote{}; | 36 | bool support_vote{}; |
| 34 | bool warp_size_potentially_larger_than_guest{}; | 37 | bool warp_size_potentially_larger_than_guest{}; |
| 35 | 38 | ||