summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-05-26 01:43:07 -0300
committerGravatar ReinUsesLisp2019-05-26 01:48:04 -0300
commita4c5e3e339430134d9e1322622a7995bfeeee567 (patch)
tree903d8cda1060e7fc1a42648c95bde61cbb64d1e3
parentvk_device: Enable features when available and misc changes (diff)
downloadyuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.gz
yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.xz
yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.zip
vk_shader_decompiler: Misc fixes
Fix missing OpSelectionMerge instruction. This caused devices loses on most hardware, Intel didn't care. Fix [-1;1] -> [0;1] depth conversions. Conditionally use VK_EXT_scalar_block_layout. This allows us to use non-std140 layouts on UBOs. Update external Vulkan headers.
m---------externals/Vulkan-Headers0
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp104
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h8
3 files changed, 67 insertions, 45 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202 Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b61a6d170..2fb368014 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/shader_bytecode.h" 18#include "video_core/engines/shader_bytecode.h"
19#include "video_core/engines/shader_header.h" 19#include "video_core/engines/shader_header.h"
20#include "video_core/renderer_vulkan/vk_device.h"
20#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 21#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
21#include "video_core/shader/shader_ir.h" 22#include "video_core/shader/shader_ir.h"
22 23
@@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
33using Operation = const OperationNode&; 34using Operation = const OperationNode&;
34 35
35// TODO(Rodrigo): Use rasterizer's value 36// TODO(Rodrigo): Use rasterizer's value
36constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; 37constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
38constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
37constexpr u32 STAGE_BINDING_STRIDE = 0x100; 39constexpr u32 STAGE_BINDING_STRIDE = 0x100;
38 40
39enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 41enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
87 89
88class SPIRVDecompiler : public Sirit::Module { 90class SPIRVDecompiler : public Sirit::Module {
89public: 91public:
90 explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) 92 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
91 : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { 93 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
92 AddCapability(spv::Capability::Shader); 94 AddCapability(spv::Capability::Shader);
93 AddExtension("SPV_KHR_storage_buffer_storage_class"); 95 AddExtension("SPV_KHR_storage_buffer_storage_class");
94 AddExtension("SPV_KHR_variable_pointers"); 96 AddExtension("SPV_KHR_variable_pointers");
@@ -195,7 +197,9 @@ public:
195 entries.samplers.emplace_back(sampler); 197 entries.samplers.emplace_back(sampler);
196 } 198 }
197 for (const auto& attribute : ir.GetInputAttributes()) { 199 for (const auto& attribute : ir.GetInputAttributes()) {
198 entries.attributes.insert(GetGenericAttributeLocation(attribute)); 200 if (IsGenericAttribute(attribute)) {
201 entries.attributes.insert(GetGenericAttributeLocation(attribute));
202 }
199 } 203 }
200 entries.clip_distances = ir.GetClipDistances(); 204 entries.clip_distances = ir.GetClipDistances();
201 entries.shader_length = ir.GetLength(); 205 entries.shader_length = ir.GetLength();
@@ -210,7 +214,6 @@ private:
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; 214 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211 215
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 216 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 static constexpr u32 CBUF_STRIDE = 16;
214 217
215 void AllocateBindings() { 218 void AllocateBindings() {
216 const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; 219 const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@@ -315,6 +318,7 @@ private:
315 constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", 318 constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
316 "overflow"}; 319 "overflow"};
317 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { 320 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
321 const auto flag_code = static_cast<InternalFlag>(flag);
318 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); 322 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
319 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); 323 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
320 } 324 }
@@ -374,7 +378,9 @@ private:
374 u32 binding = const_buffers_base_binding; 378 u32 binding = const_buffers_base_binding;
375 for (const auto& entry : ir.GetConstantBuffers()) { 379 for (const auto& entry : ir.GetConstantBuffers()) {
376 const auto [index, size] = entry; 380 const auto [index, size] = entry;
377 const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); 381 const Id type =
382 device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
383 const Id id = OpVariable(type, spv::StorageClass::Uniform);
378 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); 384 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
379 385
380 Decorate(id, spv::Decoration::Binding, binding++); 386 Decorate(id, spv::Decoration::Binding, binding++);
@@ -569,33 +575,35 @@ private:
569 const Node offset = cbuf->GetOffset(); 575 const Node offset = cbuf->GetOffset();
570 const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); 576 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
571 577
572 Id buffer_index{}; 578 Id pointer{};
573 Id buffer_element{}; 579 if (device.IsExtScalarBlockLayoutSupported()) {
574 580 const Id buffer_offset = Emit(OpShiftRightLogical(
575 if (const auto immediate = std::get_if<ImmediateNode>(offset)) { 581 t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
576 // Direct access 582 pointer = Emit(
577 const u32 offset_imm = immediate->GetValue(); 583 OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
578 ASSERT(offset_imm % 4 == 0);
579 buffer_index = Constant(t_uint, offset_imm / 16);
580 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
581
582 } else if (std::holds_alternative<OperationNode>(*offset)) {
583 // Indirect access
584 // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
585 // emits sub-optimal code on GLSL from my testing).
586 const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
587 const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
588 const Id final_offset = Emit(
589 OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
590 buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
591 buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
592
593 } else { 584 } else {
594 UNREACHABLE_MSG("Unmanaged offset node type"); 585 Id buffer_index{};
586 Id buffer_element{};
587 if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
588 // Direct access
589 const u32 offset_imm = immediate->GetValue();
590 ASSERT(offset_imm % 4 == 0);
591 buffer_index = Constant(t_uint, offset_imm / 16);
592 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
593 } else if (std::holds_alternative<OperationNode>(*offset)) {
594 // Indirect access
595 const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
596 const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
597 const Id final_offset = Emit(OpUMod(
598 t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
599 buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
600 buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
601 } else {
602 UNREACHABLE_MSG("Unmanaged offset node type");
603 }
604 pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
605 buffer_index, buffer_element));
595 } 606 }
596
597 const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
598 buffer_index, buffer_element));
599 return Emit(OpLoad(t_float, pointer)); 607 return Emit(OpLoad(t_float, pointer));
600 608
601 } else if (const auto gmem = std::get_if<GmemNode>(node)) { 609 } else if (const auto gmem = std::get_if<GmemNode>(node)) {
@@ -612,7 +620,9 @@ private:
612 // It's invalid to call conditional on nested nodes, use an operation instead 620 // It's invalid to call conditional on nested nodes, use an operation instead
613 const Id true_label = OpLabel(); 621 const Id true_label = OpLabel();
614 const Id skip_label = OpLabel(); 622 const Id skip_label = OpLabel();
615 Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); 623 const Id condition = Visit(conditional->GetCondition());
624 Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
625 Emit(OpBranchConditional(condition, true_label, skip_label));
616 Emit(true_label); 626 Emit(true_label);
617 627
618 VisitBasicBlock(conditional->GetCode()); 628 VisitBasicBlock(conditional->GetCode());
@@ -968,11 +978,11 @@ private:
968 case ShaderStage::Vertex: { 978 case ShaderStage::Vertex: {
969 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't 979 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
970 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. 980 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
971 const Id position = AccessElement(t_float4, per_vertex, position_index); 981 const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
972 Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); 982 Id depth = Emit(OpLoad(t_float, z_pointer));
973 depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); 983 depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
974 depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); 984 depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
975 Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); 985 Emit(OpStore(z_pointer, depth));
976 break; 986 break;
977 } 987 }
978 case ShaderStage::Fragment: { 988 case ShaderStage::Fragment: {
@@ -1293,6 +1303,7 @@ private:
1293 &SPIRVDecompiler::YNegate, 1303 &SPIRVDecompiler::YNegate,
1294 }; 1304 };
1295 1305
1306 const VKDevice& device;
1296 const ShaderIR& ir; 1307 const ShaderIR& ir;
1297 const ShaderStage stage; 1308 const ShaderStage stage;
1298 const Tegra::Shader::Header header; 1309 const Tegra::Shader::Header header;
@@ -1331,12 +1342,18 @@ private:
1331 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); 1342 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
1332 1343
1333 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); 1344 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
1334 const Id t_cbuf_array = 1345 const Id t_cbuf_std140 = Decorate(
1335 Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), 1346 Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
1336 spv::Decoration::ArrayStride, CBUF_STRIDE); 1347 spv::Decoration::ArrayStride, 16u);
1337 const Id t_cbuf_struct = MemberDecorate( 1348 const Id t_cbuf_scalar = Decorate(
1338 Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); 1349 Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
1339 const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); 1350 spv::Decoration::ArrayStride, 4u);
1351 const Id t_cbuf_std140_struct = MemberDecorate(
1352 Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
1353 const Id t_cbuf_scalar_struct = MemberDecorate(
1354 Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
1355 const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
1356 const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
1340 1357
1341 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); 1358 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
1342 const Id t_gmem_array = 1359 const Id t_gmem_array =
@@ -1385,8 +1402,9 @@ private:
1385 std::map<u32, Id> labels; 1402 std::map<u32, Id> labels;
1386}; 1403};
1387 1404
1388DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { 1405DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
1389 auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); 1406 Maxwell::ShaderStage stage) {
1407 auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
1390 decompiler->Decompile(); 1408 decompiler->Decompile();
1391 return {std::move(decompiler), decompiler->GetShaderEntries()}; 1409 return {std::move(decompiler), decompiler->GetShaderEntries()};
1392} 1410}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 329d8fa38..f90541cc1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
20class ShaderIR; 20class ShaderIR;
21} 21}
22 22
23namespace Vulkan {
24class VKDevice;
25}
26
23namespace Vulkan::VKShader { 27namespace Vulkan::VKShader {
24 28
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 29using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26
27using SamplerEntry = VideoCommon::Shader::Sampler; 30using SamplerEntry = VideoCommon::Shader::Sampler;
28 31
29constexpr u32 DESCRIPTOR_SET = 0; 32constexpr u32 DESCRIPTOR_SET = 0;
@@ -75,6 +78,7 @@ struct ShaderEntries {
75 78
76using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; 79using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
77 80
78DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); 81DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
82 Maxwell::ShaderStage stage);
79 83
80} // namespace Vulkan::VKShader 84} // namespace Vulkan::VKShader