vk_shader_decompiler: Misc fixes

Fix missing OpSelectionMerge instruction. This caused devices loses on most hardware, Intel didn't care. Fix [-1;1] -> [0;1] depth conversions. Conditionally use VK_EXT_scalar_block_layout. This allows us to use non-std140 layouts on UBOs. Update external Vulkan headers.
author: ReinUsesLisp 2019-05-26 01:43:07 -0300
committer: ReinUsesLisp 2019-05-26 01:48:04 -0300
commit: a4c5e3e339430134d9e1322622a7995bfeeee567 (patch)
tree: 903d8cda1060e7fc1a42648c95bde61cbb64d1e3
parent: vk_device: Enable features when available and misc changes (diff)
download: yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.gz
yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.xz
yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.zip
3 files changed, 67 insertions, 45 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
-Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202
+Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b61a6d170..2fb368014 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/shader/shader_ir.h"
@@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
 using Operation = const OperationNode&;
 // TODO(Rodrigo): Use rasterizer's value
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
+constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
 constexpr u32 STAGE_BINDING_STRIDE = 0x100;
 enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
 class SPIRVDecompiler : public Sirit::Module {
 public:
-    explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
+    explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
-        : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
+        : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
        AddCapability(spv::Capability::Shader);
        AddExtension("SPV_KHR_storage_buffer_storage_class");
        AddExtension("SPV_KHR_variable_pointers");
@@ -195,7 +197,9 @@ public:
            entries.samplers.emplace_back(sampler);
        }
        for (const auto& attribute : ir.GetInputAttributes()) {
-            entries.attributes.insert(GetGenericAttributeLocation(attribute));
+            if (IsGenericAttribute(attribute)) {
+                entries.attributes.insert(GetGenericAttributeLocation(attribute));
+            }
        }
        entries.clip_distances = ir.GetClipDistances();
        entries.shader_length = ir.GetLength();
@@ -210,7 +214,6 @@ private:
        std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
    static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
-    static constexpr u32 CBUF_STRIDE = 16;
    void AllocateBindings() {
        const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@@ -315,6 +318,7 @@ private:
        constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
                                                                         "overflow"};
        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
+            const auto flag_code = static_cast<InternalFlag>(flag);
            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
        }
@@ -374,7 +378,9 @@ private:
        u32 binding = const_buffers_base_binding;
        for (const auto& entry : ir.GetConstantBuffers()) {
            const auto [index, size] = entry;
-            const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
+            const Id type =
+                device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
+            const Id id = OpVariable(type, spv::StorageClass::Uniform);
            AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
            Decorate(id, spv::Decoration::Binding, binding++);
@@ -569,33 +575,35 @@ private:
            const Node offset = cbuf->GetOffset();
            const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
-            Id buffer_index{};
+            Id pointer{};
-            Id buffer_element{};
+            if (device.IsExtScalarBlockLayoutSupported()) {
+                const Id buffer_offset = Emit(OpShiftRightLogical(
-            if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                    t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
-                // Direct access
+                pointer = Emit(
-                const u32 offset_imm = immediate->GetValue();
+                    OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
-                ASSERT(offset_imm % 4 == 0);
-                buffer_index = Constant(t_uint, offset_imm / 16);
-                buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
-            } else if (std::holds_alternative<OperationNode>(*offset)) {
-                // Indirect access
-                // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
-                // emits sub-optimal code on GLSL from my testing).
-                const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
-                const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
-                const Id final_offset = Emit(
-                    OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
-                buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
-                buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
            } else {
-                UNREACHABLE_MSG("Unmanaged offset node type");
+                Id buffer_index{};
+                Id buffer_element{};
+                if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                    // Direct access
+                    const u32 offset_imm = immediate->GetValue();
+                    ASSERT(offset_imm % 4 == 0);
+                    buffer_index = Constant(t_uint, offset_imm / 16);
+                    buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
+                } else if (std::holds_alternative<OperationNode>(*offset)) {
+                    // Indirect access
+                    const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
+                    const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
+                    const Id final_offset = Emit(OpUMod(
+                        t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
+                    buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
+                    buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
+                } else {
+                    UNREACHABLE_MSG("Unmanaged offset node type");
+                }
+                pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
+                                             buffer_index, buffer_element));
            }
-            const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
-                                                  buffer_index, buffer_element));
            return Emit(OpLoad(t_float, pointer));
        } else if (const auto gmem = std::get_if<GmemNode>(node)) {
@@ -612,7 +620,9 @@ private:
            // It's invalid to call conditional on nested nodes, use an operation instead
            const Id true_label = OpLabel();
            const Id skip_label = OpLabel();
-            Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
+            const Id condition = Visit(conditional->GetCondition());
+            Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
+            Emit(OpBranchConditional(condition, true_label, skip_label));
            Emit(true_label);
            VisitBasicBlock(conditional->GetCode());
@@ -968,11 +978,11 @@ private:
        case ShaderStage::Vertex: {
            // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
            // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
-            const Id position = AccessElement(t_float4, per_vertex, position_index);
+            const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
-            Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
+            Id depth = Emit(OpLoad(t_float, z_pointer));
            depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
            depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
-            Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
+            Emit(OpStore(z_pointer, depth));
            break;
        }
        case ShaderStage::Fragment: {
@@ -1293,6 +1303,7 @@ private:
        &SPIRVDecompiler::YNegate,
    };
+    const VKDevice& device;
    const ShaderIR& ir;
    const ShaderStage stage;
    const Tegra::Shader::Header header;
@@ -1331,12 +1342,18 @@ private:
    const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
    const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
-    const Id t_cbuf_array =
+    const Id t_cbuf_std140 = Decorate(
-        Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
+        Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
-                 spv::Decoration::ArrayStride, CBUF_STRIDE);
+        spv::Decoration::ArrayStride, 16u);
-    const Id t_cbuf_struct = MemberDecorate(
+    const Id t_cbuf_scalar = Decorate(
-        Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+        Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
-    const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
+        spv::Decoration::ArrayStride, 4u);
+    const Id t_cbuf_std140_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_scalar_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
+    const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
    const Id t_gmem_array =
@@ -1385,8 +1402,9 @@ private:
    std::map<u32, Id> labels;
 };
-DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
+DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
-    auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
+                           Maxwell::ShaderStage stage) {
+    auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
    decompiler->Decompile();
    return {std::move(decompiler), decompiler->GetShaderEntries()};
 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 329d8fa38..f90541cc1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
 class ShaderIR;
 }
+namespace Vulkan {
+class VKDevice;
+}
 namespace Vulkan::VKShader {
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using SamplerEntry = VideoCommon::Shader::Sampler;
 constexpr u32 DESCRIPTOR_SET = 0;
@@ -75,6 +78,7 @@ struct ShaderEntries {
 using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
-DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+                           Maxwell::ShaderStage stage);
 } // namespace Vulkan::VKShader
author	ReinUsesLisp	2019-05-26 01:43:07 -0300
committer	ReinUsesLisp	2019-05-26 01:48:04 -0300
commit	a4c5e3e339430134d9e1322622a7995bfeeee567 (patch)
tree	903d8cda1060e7fc1a42648c95bde61cbb64d1e3
parent	vk_device: Enable features when available and misc changes (diff)
download	yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.gz yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.tar.xz yuzu-a4c5e3e339430134d9e1322622a7995bfeeee567.zip

diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
	Subproject 15e5c4db7500b936ae758236f2e72fc1aec2202		Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895


diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b61a6d170..2fb368014 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
17	#include "video_core/engines/maxwell_3d.h"	17	#include "video_core/engines/maxwell_3d.h"
18	#include "video_core/engines/shader_bytecode.h"	18	#include "video_core/engines/shader_bytecode.h"
19	#include "video_core/engines/shader_header.h"	19	#include "video_core/engines/shader_header.h"
		20	#include "video_core/renderer_vulkan/vk_device.h"
20	#include "video_core/renderer_vulkan/vk_shader_decompiler.h"	21	#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
21	#include "video_core/shader/shader_ir.h"	22	#include "video_core/shader/shader_ir.h"
22		23
@@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
33	using Operation = const OperationNode&;	34	using Operation = const OperationNode&;
34		35
35	// TODO(Rodrigo): Use rasterizer's value	36	// TODO(Rodrigo): Use rasterizer's value
36	constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;	37	constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
		38	constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
37	constexpr u32 STAGE_BINDING_STRIDE = 0x100;	39	constexpr u32 STAGE_BINDING_STRIDE = 0x100;
38		40
39	enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };	41	enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
87		89
88	class SPIRVDecompiler : public Sirit::Module {	90	class SPIRVDecompiler : public Sirit::Module {
89	public:	91	public:
90	explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)	92	explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
91	: Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {	93	: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
92	AddCapability(spv::Capability::Shader);	94	AddCapability(spv::Capability::Shader);
93	AddExtension("SPV_KHR_storage_buffer_storage_class");	95	AddExtension("SPV_KHR_storage_buffer_storage_class");
94	AddExtension("SPV_KHR_variable_pointers");	96	AddExtension("SPV_KHR_variable_pointers");
@@ -195,7 +197,9 @@ public:
195	entries.samplers.emplace_back(sampler);	197	entries.samplers.emplace_back(sampler);
196	}	198	}
197	for (const auto& attribute : ir.GetInputAttributes()) {	199	for (const auto& attribute : ir.GetInputAttributes()) {
198	entries.attributes.insert(GetGenericAttributeLocation(attribute));	200	if (IsGenericAttribute(attribute)) {
		201	entries.attributes.insert(GetGenericAttributeLocation(attribute));
		202	}
199	}	203	}
200	entries.clip_distances = ir.GetClipDistances();	204	entries.clip_distances = ir.GetClipDistances();
201	entries.shader_length = ir.GetLength();	205	entries.shader_length = ir.GetLength();
@@ -210,7 +214,6 @@ private:
210	std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;	214	std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211		215
212	static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);	216	static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213	static constexpr u32 CBUF_STRIDE = 16;
214		217
215	void AllocateBindings() {	218	void AllocateBindings() {
216	const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;	219	const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@@ -315,6 +318,7 @@ private:
315	constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",	318	constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
316	"overflow"};	319	"overflow"};
317	for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {	320	for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
		321	const auto flag_code = static_cast<InternalFlag>(flag);
318	const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);	322	const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
319	internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));	323	internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
320	}	324	}
@@ -374,7 +378,9 @@ private:
374	u32 binding = const_buffers_base_binding;	378	u32 binding = const_buffers_base_binding;
375	for (const auto& entry : ir.GetConstantBuffers()) {	379	for (const auto& entry : ir.GetConstantBuffers()) {
376	const auto [index, size] = entry;	380	const auto [index, size] = entry;
377	const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);	381	const Id type =
		382	device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
		383	const Id id = OpVariable(type, spv::StorageClass::Uniform);
378	AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));	384	AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
379		385
380	Decorate(id, spv::Decoration::Binding, binding++);	386	Decorate(id, spv::Decoration::Binding, binding++);
@@ -569,33 +575,35 @@ private:
569	const Node offset = cbuf->GetOffset();	575	const Node offset = cbuf->GetOffset();
570	const Id buffer_id = constant_buffers.at(cbuf->GetIndex());	576	const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
571		577
572	Id buffer_index{};	578	Id pointer{};
573	Id buffer_element{};	579	if (device.IsExtScalarBlockLayoutSupported()) {
574		580	const Id buffer_offset = Emit(OpShiftRightLogical(
575	if (const auto immediate = std::get_if<ImmediateNode>(offset)) {	581	t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
576	// Direct access	582	pointer = Emit(
577	const u32 offset_imm = immediate->GetValue();	583	OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
578	ASSERT(offset_imm % 4 == 0);
579	buffer_index = Constant(t_uint, offset_imm / 16);
580	buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
581
582	} else if (std::holds_alternative<OperationNode>(*offset)) {
583	// Indirect access
584	// TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
585	// emits sub-optimal code on GLSL from my testing).
586	const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
587	const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
588	const Id final_offset = Emit(
589	OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
590	buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
591	buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
592
593	} else {	584	} else {
594	UNREACHABLE_MSG("Unmanaged offset node type");	585	Id buffer_index{};
		586	Id buffer_element{};
		587	if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
		588	// Direct access
		589	const u32 offset_imm = immediate->GetValue();
		590	ASSERT(offset_imm % 4 == 0);
		591	buffer_index = Constant(t_uint, offset_imm / 16);
		592	buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
		593	} else if (std::holds_alternative<OperationNode>(*offset)) {
		594	// Indirect access
		595	const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
		596	const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
		597	const Id final_offset = Emit(OpUMod(
		598	t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
		599	buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
		600	buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
		601	} else {
		602	UNREACHABLE_MSG("Unmanaged offset node type");
		603	}
		604	pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
		605	buffer_index, buffer_element));
595	}	606	}
596
597	const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
598	buffer_index, buffer_element));
599	return Emit(OpLoad(t_float, pointer));	607	return Emit(OpLoad(t_float, pointer));
600		608
601	} else if (const auto gmem = std::get_if<GmemNode>(node)) {	609	} else if (const auto gmem = std::get_if<GmemNode>(node)) {
@@ -612,7 +620,9 @@ private:
612	// It's invalid to call conditional on nested nodes, use an operation instead	620	// It's invalid to call conditional on nested nodes, use an operation instead
613	const Id true_label = OpLabel();	621	const Id true_label = OpLabel();
614	const Id skip_label = OpLabel();	622	const Id skip_label = OpLabel();
615	Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));	623	const Id condition = Visit(conditional->GetCondition());
		624	Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
		625	Emit(OpBranchConditional(condition, true_label, skip_label));
616	Emit(true_label);	626	Emit(true_label);
617		627
618	VisitBasicBlock(conditional->GetCode());	628	VisitBasicBlock(conditional->GetCode());
@@ -968,11 +978,11 @@ private:
968	case ShaderStage::Vertex: {	978	case ShaderStage::Vertex: {
969	// TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't	979	// TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
970	// seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.	980	// seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
971	const Id position = AccessElement(t_float4, per_vertex, position_index);	981	const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
972	Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));	982	Id depth = Emit(OpLoad(t_float, z_pointer));
973	depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));	983	depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
974	depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));	984	depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
975	Emit(OpStore(AccessElement(t_out_float, position, 2), depth));	985	Emit(OpStore(z_pointer, depth));
976	break;	986	break;
977	}	987	}
978	case ShaderStage::Fragment: {	988	case ShaderStage::Fragment: {
@@ -1293,6 +1303,7 @@ private:
1293	&SPIRVDecompiler::YNegate,	1303	&SPIRVDecompiler::YNegate,
1294	};	1304	};
1295		1305
		1306	const VKDevice& device;
1296	const ShaderIR& ir;	1307	const ShaderIR& ir;
1297	const ShaderStage stage;	1308	const ShaderStage stage;
1298	const Tegra::Shader::Header header;	1309	const Tegra::Shader::Header header;
@@ -1331,12 +1342,18 @@ private:
1331	const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");	1342	const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
1332		1343
1333	const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);	1344	const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
1334	const Id t_cbuf_array =	1345	const Id t_cbuf_std140 = Decorate(
1335	Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),	1346	Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
1336	spv::Decoration::ArrayStride, CBUF_STRIDE);	1347	spv::Decoration::ArrayStride, 16u);
1337	const Id t_cbuf_struct = MemberDecorate(	1348	const Id t_cbuf_scalar = Decorate(
1338	Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);	1349	Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
1339	const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);	1350	spv::Decoration::ArrayStride, 4u);
		1351	const Id t_cbuf_std140_struct = MemberDecorate(
		1352	Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
		1353	const Id t_cbuf_scalar_struct = MemberDecorate(
		1354	Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
		1355	const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
		1356	const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
1340		1357
1341	const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);	1358	const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
1342	const Id t_gmem_array =	1359	const Id t_gmem_array =
@@ -1385,8 +1402,9 @@ private:
1385	std::map<u32, Id> labels;	1402	std::map<u32, Id> labels;
1386	};	1403	};
1387		1404
1388	DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {	1405	DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
1389	auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);	1406	Maxwell::ShaderStage stage) {
		1407	auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
1390	decompiler->Decompile();	1408	decompiler->Decompile();
1391	return {std::move(decompiler), decompiler->GetShaderEntries()};	1409	return {std::move(decompiler), decompiler->GetShaderEntries()};
1392	}	1410	}


diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
20	class ShaderIR;	20	class ShaderIR;
21	}	21	}
22		22
		23	namespace Vulkan {
		24	class VKDevice;
		25	}
		26
23	namespace Vulkan::VKShader {	27	namespace Vulkan::VKShader {
24		28
25	using Maxwell = Tegra::Engines::Maxwell3D::Regs;	29	using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26
27	using SamplerEntry = VideoCommon::Shader::Sampler;	30	using SamplerEntry = VideoCommon::Shader::Sampler;
28		31
29	constexpr u32 DESCRIPTOR_SET = 0;	32	constexpr u32 DESCRIPTOR_SET = 0;
@@ -75,6 +78,7 @@ struct ShaderEntries {
75		78
76	using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;	79	using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
77		80
78	DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);	81	DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
		82	Maxwell::ShaderStage stage);
79		83
80	} // namespace Vulkan::VKShader	84	} // namespace Vulkan::VKShader