diff options
| author | 2019-05-26 01:43:07 -0300 | |
|---|---|---|
| committer | 2019-05-26 01:48:04 -0300 | |
| commit | a4c5e3e339430134d9e1322622a7995bfeeee567 (patch) | |
| tree | 903d8cda1060e7fc1a42648c95bde61cbb64d1e3 | |
| parent | vk_device: Enable features when available and misc changes (diff) | |
| download | yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.gz yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.xz yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.zip | |
vk_shader_decompiler: Misc fixes
Fix missing OpSelectionMerge instruction. This caused devices loses on
most hardware, Intel didn't care.
Fix [-1;1] -> [0;1] depth conversions.
Conditionally use VK_EXT_scalar_block_layout. This allows us to use
non-std140 layouts on UBOs.
Update external Vulkan headers.
| m--------- | externals/Vulkan-Headers | 0 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 8 |
3 files changed, 67 insertions, 45 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers | |||
| Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202 | Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b61a6d170..2fb368014 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/engines/shader_bytecode.h" | 18 | #include "video_core/engines/shader_bytecode.h" |
| 19 | #include "video_core/engines/shader_header.h" | 19 | #include "video_core/engines/shader_header.h" |
| 20 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 21 | #include "video_core/shader/shader_ir.h" | 22 | #include "video_core/shader/shader_ir.h" |
| 22 | 23 | ||
| @@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | |||
| 33 | using Operation = const OperationNode&; | 34 | using Operation = const OperationNode&; |
| 34 | 35 | ||
| 35 | // TODO(Rodrigo): Use rasterizer's value | 36 | // TODO(Rodrigo): Use rasterizer's value |
| 36 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; | 37 | constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; |
| 38 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; | ||
| 37 | constexpr u32 STAGE_BINDING_STRIDE = 0x100; | 39 | constexpr u32 STAGE_BINDING_STRIDE = 0x100; |
| 38 | 40 | ||
| 39 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 41 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { | |||
| 87 | 89 | ||
| 88 | class SPIRVDecompiler : public Sirit::Module { | 90 | class SPIRVDecompiler : public Sirit::Module { |
| 89 | public: | 91 | public: |
| 90 | explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) | 92 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) |
| 91 | : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { | 93 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { |
| 92 | AddCapability(spv::Capability::Shader); | 94 | AddCapability(spv::Capability::Shader); |
| 93 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | 95 | AddExtension("SPV_KHR_storage_buffer_storage_class"); |
| 94 | AddExtension("SPV_KHR_variable_pointers"); | 96 | AddExtension("SPV_KHR_variable_pointers"); |
| @@ -195,7 +197,9 @@ public: | |||
| 195 | entries.samplers.emplace_back(sampler); | 197 | entries.samplers.emplace_back(sampler); |
| 196 | } | 198 | } |
| 197 | for (const auto& attribute : ir.GetInputAttributes()) { | 199 | for (const auto& attribute : ir.GetInputAttributes()) { |
| 198 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | 200 | if (IsGenericAttribute(attribute)) { |
| 201 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | ||
| 202 | } | ||
| 199 | } | 203 | } |
| 200 | entries.clip_distances = ir.GetClipDistances(); | 204 | entries.clip_distances = ir.GetClipDistances(); |
| 201 | entries.shader_length = ir.GetLength(); | 205 | entries.shader_length = ir.GetLength(); |
| @@ -210,7 +214,6 @@ private: | |||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | 214 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; |
| 211 | 215 | ||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 216 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | static constexpr u32 CBUF_STRIDE = 16; | ||
| 214 | 217 | ||
| 215 | void AllocateBindings() { | 218 | void AllocateBindings() { |
| 216 | const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; | 219 | const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; |
| @@ -315,6 +318,7 @@ private: | |||
| 315 | constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", | 318 | constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", |
| 316 | "overflow"}; | 319 | "overflow"}; |
| 317 | for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { | 320 | for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { |
| 321 | const auto flag_code = static_cast<InternalFlag>(flag); | ||
| 318 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | 322 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); |
| 319 | internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); | 323 | internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); |
| 320 | } | 324 | } |
| @@ -374,7 +378,9 @@ private: | |||
| 374 | u32 binding = const_buffers_base_binding; | 378 | u32 binding = const_buffers_base_binding; |
| 375 | for (const auto& entry : ir.GetConstantBuffers()) { | 379 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 376 | const auto [index, size] = entry; | 380 | const auto [index, size] = entry; |
| 377 | const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); | 381 | const Id type = |
| 382 | device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; | ||
| 383 | const Id id = OpVariable(type, spv::StorageClass::Uniform); | ||
| 378 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); | 384 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); |
| 379 | 385 | ||
| 380 | Decorate(id, spv::Decoration::Binding, binding++); | 386 | Decorate(id, spv::Decoration::Binding, binding++); |
| @@ -569,33 +575,35 @@ private: | |||
| 569 | const Node offset = cbuf->GetOffset(); | 575 | const Node offset = cbuf->GetOffset(); |
| 570 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); | 576 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); |
| 571 | 577 | ||
| 572 | Id buffer_index{}; | 578 | Id pointer{}; |
| 573 | Id buffer_element{}; | 579 | if (device.IsExtScalarBlockLayoutSupported()) { |
| 574 | 580 | const Id buffer_offset = Emit(OpShiftRightLogical( | |
| 575 | if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | 581 | t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); |
| 576 | // Direct access | 582 | pointer = Emit( |
| 577 | const u32 offset_imm = immediate->GetValue(); | 583 | OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); |
| 578 | ASSERT(offset_imm % 4 == 0); | ||
| 579 | buffer_index = Constant(t_uint, offset_imm / 16); | ||
| 580 | buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||
| 581 | |||
| 582 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 583 | // Indirect access | ||
| 584 | // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which | ||
| 585 | // emits sub-optimal code on GLSL from my testing). | ||
| 586 | const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); | ||
| 587 | const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); | ||
| 588 | const Id final_offset = Emit( | ||
| 589 | OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); | ||
| 590 | buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 591 | buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 592 | |||
| 593 | } else { | 584 | } else { |
| 594 | UNREACHABLE_MSG("Unmanaged offset node type"); | 585 | Id buffer_index{}; |
| 586 | Id buffer_element{}; | ||
| 587 | if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | ||
| 588 | // Direct access | ||
| 589 | const u32 offset_imm = immediate->GetValue(); | ||
| 590 | ASSERT(offset_imm % 4 == 0); | ||
| 591 | buffer_index = Constant(t_uint, offset_imm / 16); | ||
| 592 | buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||
| 593 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 594 | // Indirect access | ||
| 595 | const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); | ||
| 596 | const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); | ||
| 597 | const Id final_offset = Emit(OpUMod( | ||
| 598 | t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); | ||
| 599 | buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 600 | buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 601 | } else { | ||
| 602 | UNREACHABLE_MSG("Unmanaged offset node type"); | ||
| 603 | } | ||
| 604 | pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), | ||
| 605 | buffer_index, buffer_element)); | ||
| 595 | } | 606 | } |
| 596 | |||
| 597 | const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), | ||
| 598 | buffer_index, buffer_element)); | ||
| 599 | return Emit(OpLoad(t_float, pointer)); | 607 | return Emit(OpLoad(t_float, pointer)); |
| 600 | 608 | ||
| 601 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { | 609 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { |
| @@ -612,7 +620,9 @@ private: | |||
| 612 | // It's invalid to call conditional on nested nodes, use an operation instead | 620 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 613 | const Id true_label = OpLabel(); | 621 | const Id true_label = OpLabel(); |
| 614 | const Id skip_label = OpLabel(); | 622 | const Id skip_label = OpLabel(); |
| 615 | Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); | 623 | const Id condition = Visit(conditional->GetCondition()); |
| 624 | Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); | ||
| 625 | Emit(OpBranchConditional(condition, true_label, skip_label)); | ||
| 616 | Emit(true_label); | 626 | Emit(true_label); |
| 617 | 627 | ||
| 618 | VisitBasicBlock(conditional->GetCode()); | 628 | VisitBasicBlock(conditional->GetCode()); |
| @@ -968,11 +978,11 @@ private: | |||
| 968 | case ShaderStage::Vertex: { | 978 | case ShaderStage::Vertex: { |
| 969 | // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't | 979 | // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't |
| 970 | // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. | 980 | // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. |
| 971 | const Id position = AccessElement(t_float4, per_vertex, position_index); | 981 | const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); |
| 972 | Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); | 982 | Id depth = Emit(OpLoad(t_float, z_pointer)); |
| 973 | depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); | 983 | depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); |
| 974 | depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); | 984 | depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); |
| 975 | Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); | 985 | Emit(OpStore(z_pointer, depth)); |
| 976 | break; | 986 | break; |
| 977 | } | 987 | } |
| 978 | case ShaderStage::Fragment: { | 988 | case ShaderStage::Fragment: { |
| @@ -1293,6 +1303,7 @@ private: | |||
| 1293 | &SPIRVDecompiler::YNegate, | 1303 | &SPIRVDecompiler::YNegate, |
| 1294 | }; | 1304 | }; |
| 1295 | 1305 | ||
| 1306 | const VKDevice& device; | ||
| 1296 | const ShaderIR& ir; | 1307 | const ShaderIR& ir; |
| 1297 | const ShaderStage stage; | 1308 | const ShaderStage stage; |
| 1298 | const Tegra::Shader::Header header; | 1309 | const Tegra::Shader::Header header; |
| @@ -1331,12 +1342,18 @@ private: | |||
| 1331 | const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); | 1342 | const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); |
| 1332 | 1343 | ||
| 1333 | const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); | 1344 | const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); |
| 1334 | const Id t_cbuf_array = | 1345 | const Id t_cbuf_std140 = Decorate( |
| 1335 | Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), | 1346 | Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), |
| 1336 | spv::Decoration::ArrayStride, CBUF_STRIDE); | 1347 | spv::Decoration::ArrayStride, 16u); |
| 1337 | const Id t_cbuf_struct = MemberDecorate( | 1348 | const Id t_cbuf_scalar = Decorate( |
| 1338 | Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | 1349 | Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), |
| 1339 | const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); | 1350 | spv::Decoration::ArrayStride, 4u); |
| 1351 | const Id t_cbuf_std140_struct = MemberDecorate( | ||
| 1352 | Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 1353 | const Id t_cbuf_scalar_struct = MemberDecorate( | ||
| 1354 | Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 1355 | const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); | ||
| 1356 | const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); | ||
| 1340 | 1357 | ||
| 1341 | const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); | 1358 | const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); |
| 1342 | const Id t_gmem_array = | 1359 | const Id t_gmem_array = |
| @@ -1385,8 +1402,9 @@ private: | |||
| 1385 | std::map<u32, Id> labels; | 1402 | std::map<u32, Id> labels; |
| 1386 | }; | 1403 | }; |
| 1387 | 1404 | ||
| 1388 | DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { | 1405 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 1389 | auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); | 1406 | Maxwell::ShaderStage stage) { |
| 1407 | auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); | ||
| 1390 | decompiler->Decompile(); | 1408 | decompiler->Decompile(); |
| 1391 | return {std::move(decompiler), decompiler->GetShaderEntries()}; | 1409 | return {std::move(decompiler), decompiler->GetShaderEntries()}; |
| 1392 | } | 1410 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -20,10 +20,13 @@ namespace VideoCommon::Shader { | |||
| 20 | class ShaderIR; | 20 | class ShaderIR; |
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | namespace Vulkan { | ||
| 24 | class VKDevice; | ||
| 25 | } | ||
| 26 | |||
| 23 | namespace Vulkan::VKShader { | 27 | namespace Vulkan::VKShader { |
| 24 | 28 | ||
| 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 29 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 26 | |||
| 27 | using SamplerEntry = VideoCommon::Shader::Sampler; | 30 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 28 | 31 | ||
| 29 | constexpr u32 DESCRIPTOR_SET = 0; | 32 | constexpr u32 DESCRIPTOR_SET = 0; |
| @@ -75,6 +78,7 @@ struct ShaderEntries { | |||
| 75 | 78 | ||
| 76 | using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; | 79 | using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; |
| 77 | 80 | ||
| 78 | DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); | 81 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 82 | Maxwell::ShaderStage stage); | ||
| 79 | 83 | ||
| 80 | } // namespace Vulkan::VKShader | 84 | } // namespace Vulkan::VKShader |