From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- .../renderer_vulkan/vk_shader_decompiler.cpp | 3166 -------------------- .../renderer_vulkan/vk_shader_decompiler.h | 99 - 2 files changed, 3265 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null @@ -1,3166 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Vulkan { - -namespace { - -using Sirit::Id; -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -// TODO(Rodrigo): Use rasterizer's value -constexpr u32 MaxConstBufferFloats = 0x4000; -constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; - -constexpr u32 NumInputPatches = 32; // This value seems to be the standard - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -class Expression final { -public: - Expression(Id id_, Type type_) : id{id_}, type{type_} { - ASSERT(type_ != Type::Void); - } - Expression() : type{Type::Void} {} - - Id id{}; - Type type{}; -}; -static_assert(std::is_standard_layout_v); - -struct TexelBuffer { - Id image_type{}; - Id image{}; -}; - -struct SampledImage { - Id image_type{}; - Id sampler_type{}; - Id sampler_pointer_type{}; - Id variable{}; -}; - -struct StorageImage { - Id image_type{}; - Id image{}; -}; - -struct AttributeType { - Type type; - Id scalar; - Id vector; -}; - -struct VertexIndices { - std::optional position; - std::optional layer; - std::optional viewport; - std::optional point_size; - std::optional clip_distances; -}; - -struct GenericVaryingDescription { - Id id = nullptr; - u32 first_element = 0; - bool is_scalar = false; -}; - -spv::Dim GetSamplerDim(const SamplerEntry& sampler) { - ASSERT(!sampler.is_buffer); - switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return spv::Dim::Dim1D; - case Tegra::Shader::TextureType::Texture2D: - return spv::Dim::Dim2D; - case Tegra::Shader::TextureType::Texture3D: - return spv::Dim::Dim3D; - case Tegra::Shader::TextureType::TextureCube: - return spv::Dim::Cube; - default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); - return spv::Dim::Dim2D; - } -} - -std::pair GetImageDim(const ImageEntry& image) { - switch (image.type) { - case Tegra::Shader::ImageType::Texture1D: - return {spv::Dim::Dim1D, false}; - case Tegra::Shader::ImageType::TextureBuffer: - return {spv::Dim::Buffer, false}; - case Tegra::Shader::ImageType::Texture1DArray: - return {spv::Dim::Dim1D, true}; - case Tegra::Shader::ImageType::Texture2D: - return {spv::Dim::Dim2D, false}; - case Tegra::Shader::ImageType::Texture2DArray: - return {spv::Dim::Dim2D, true}; - case Tegra::Shader::ImageType::Texture3D: - return {spv::Dim::Dim3D, false}; - default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); - return {spv::Dim::Dim2D, false}; - } -} - -/// Returns the number of vertices present in a primitive topology. -u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { - switch (primitive_topology) { - case Maxwell::PrimitiveTopology::Points: - return 1; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return 2; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return 3; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return 4; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return 6; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return 3; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return 3; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return 3; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return 3; - default: - UNREACHABLE(); - return 3; - } -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { - switch (primitive) { - case Maxwell::TessellationPrimitive::Isolines: - return spv::ExecutionMode::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return spv::ExecutionMode::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return spv::ExecutionMode::Quads; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { - switch (spacing) { - case Maxwell::TessellationSpacing::Equal: - return spv::ExecutionMode::SpacingEqual; - case Maxwell::TessellationSpacing::FractionalOdd: - return spv::ExecutionMode::SpacingFractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return spv::ExecutionMode::SpacingFractionalEven; - } - UNREACHABLE(); - return spv::ExecutionMode::SpacingEqual; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { - switch (input_topology) { - case Maxwell::PrimitiveTopology::Points: - return spv::ExecutionMode::InputPoints; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return spv::ExecutionMode::InputLines; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return spv::ExecutionMode::InputLinesAdjacency; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return spv::ExecutionMode::InputTrianglesAdjacency; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return spv::ExecutionMode::Triangles; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { - switch (output_topology) { - case Tegra::Shader::OutputTopology::PointList: - return spv::ExecutionMode::OutputPoints; - case Tegra::Shader::OutputTopology::LineStrip: - return spv::ExecutionMode::OutputLineStrip; - case Tegra::Shader::OutputTopology::TriangleStrip: - return spv::ExecutionMode::OutputTriangleStrip; - default: - UNREACHABLE(); - return spv::ExecutionMode::OutputPoints; - } -} - -/// Returns true if an attribute index is one of the 32 generic attributes -constexpr bool IsGenericAttribute(Attribute::Index attribute) { - return attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31; -} - -/// Returns the location of a generic attribute -u32 GetGenericAttributeLocation(Attribute::Index attribute) { - ASSERT(IsGenericAttribute(attribute)); - return static_cast(attribute) - static_cast(Attribute::Index::Attribute_0); -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (std::holds_alternative(meta)) { - return std::get(meta).precise; - } - return false; -} - -class SPIRVDecompiler final : public Sirit::Module { -public: - explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, - const Registry& registry_, const Specialization& specialization_) - : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, - registry{registry_}, specialization{specialization_} { - if (stage_ != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); - } - - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); - AddCapability(spv::Capability::ImageQuery); - AddCapability(spv::Capability::Image1D); - AddCapability(spv::Capability::ImageBuffer); - AddCapability(spv::Capability::ImageGatherExtended); - AddCapability(spv::Capability::SampledBuffer); - AddCapability(spv::Capability::StorageImageWriteWithoutFormat); - AddCapability(spv::Capability::DrawParameters); - AddCapability(spv::Capability::SubgroupBallotKHR); - AddCapability(spv::Capability::SubgroupVoteKHR); - AddExtension("SPV_KHR_16bit_storage"); - AddExtension("SPV_KHR_shader_ballot"); - AddExtension("SPV_KHR_subgroup_vote"); - AddExtension("SPV_KHR_storage_buffer_storage_class"); - AddExtension("SPV_KHR_variable_pointers"); - AddExtension("SPV_KHR_shader_draw_parameters"); - - if (!transform_feedback.empty()) { - if (device.IsExtTransformFeedbackSupported()) { - AddCapability(spv::Capability::TransformFeedback); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " - "supported on this device"); - } - } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { - if (ir.UsesViewportIndex()) { - AddCapability(spv::Capability::MultiViewport); - } - if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { - AddExtension("SPV_EXT_shader_viewport_index_layer"); - AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); - } - } - if (device.IsFormatlessImageLoadSupported()) { - AddCapability(spv::Capability::StorageImageReadWithoutFormat); - } - if (device.IsFloat16Supported()) { - AddCapability(spv::Capability::Float16); - } - t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); - t_half = Name(TypeVector(t_scalar_half, 2), "half"); - - const Id main = Decompile(); - - switch (stage) { - case ShaderType::Vertex: - AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); - break; - case ShaderType::TesselationControl: - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common2.threads_per_input_primitive); - break; - case ShaderType::TesselationEval: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); - AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); - AddExecutionMode(main, info.tessellation_clockwise - ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); - break; - } - case ShaderType::Geometry: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Geometry); - AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); - AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common4.max_output_vertices); - // TODO(Rodrigo): Where can we get this info from? - AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); - break; - } - case ShaderType::Fragment: - AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); - if (header.ps.omap.depth) { - AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - } - if (specialization.early_fragment_tests) { - AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); - } - break; - case ShaderType::Compute: - const auto workgroup_size = specialization.workgroup_size; - AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); - break; - } - } - -private: - Id Decompile() { - DeclareCommon(); - DeclareVertex(); - DeclareTessControl(); - DeclareTessEval(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareSharedMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - - u32 binding = specialization.base_binding; - binding = DeclareConstantBuffers(binding); - binding = DeclareGlobalBuffers(binding); - binding = DeclareUniformTexels(binding); - binding = DeclareSamplers(binding); - binding = DeclareStorageTexels(binding); - binding = DeclareImages(binding); - - const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); - AddLabel(); - - if (ir.IsDecompiled()) { - DeclareFlowVariables(); - DecompileAST(); - } else { - AllocateLabels(); - DecompileBranchMode(); - } - - OpReturn(); - OpFunctionEnd(); - - return main; - } - - void DefinePrologue() { - if (stage == ShaderType::Vertex) { - // Clear Position to avoid reading trash on the Z conversion. - const auto position_index = out_indices.position.value(); - const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, v_varying_default); - - if (specialization.point_size) { - const u32 point_size_index = out_indices.point_size.value(); - const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); - OpStore(out_point_size, Constant(t_float, *specialization.point_size)); - } - } - } - - void DecompileAST(); - - void DecompileBranchMode() { - const u32 first_address = ir.GetBasicBlocks().begin()->first; - const Id loop_label = OpLabel("loop"); - const Id merge_label = OpLabel("merge"); - const Id dummy_label = OpLabel(); - const Id jump_label = OpLabel(); - continue_label = OpLabel("continue"); - - std::vector literals; - std::vector branch_labels; - for (const auto& [literal, label] : labels) { - literals.push_back(literal); - branch_labels.push_back(label); - } - - jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), - spv::StorageClass::Function, Constant(t_uint, first_address)); - AddLocalVariable(jmp_to); - - std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); - std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); - - Name(jmp_to, "jmp_to"); - Name(ssy_flow_stack, "ssy_flow_stack"); - Name(ssy_flow_stack_top, "ssy_flow_stack_top"); - Name(pbk_flow_stack, "pbk_flow_stack"); - Name(pbk_flow_stack_top, "pbk_flow_stack_top"); - - DefinePrologue(); - - OpBranch(loop_label); - AddLabel(loop_label); - OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); - OpBranch(dummy_label); - - AddLabel(dummy_label); - const Id default_branch = OpLabel(); - const Id jmp_to_load = OpLoad(t_uint, jmp_to); - OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); - OpSwitch(jmp_to_load, default_branch, literals, branch_labels); - - AddLabel(default_branch); - OpReturn(); - - for (const auto& [address, bb] : ir.GetBasicBlocks()) { - AddLabel(labels.at(address)); - - VisitBasicBlock(bb); - - const auto next_it = labels.lower_bound(address + 1); - const Id next_label = next_it != labels.end() ? next_it->second : default_branch; - OpBranch(next_label); - } - - AddLabel(jump_label); - OpBranch(continue_label); - AddLabel(continue_label); - OpBranch(loop_label); - AddLabel(merge_label); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); - - void AllocateLabels() { - for (const auto& pair : ir.GetBasicBlocks()) { - const u32 address = pair.first; - labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); - } - } - - void DeclareCommon() { - thread_id = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); - thread_masks[0] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); - thread_masks[1] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); - thread_masks[2] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); - thread_masks[3] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); - thread_masks[4] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - - // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); - instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); - base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); - base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); - } - - void DeclareTessControl() { - if (stage != ShaderType::TesselationControl) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertexArray(header.common2.threads_per_input_primitive); - - tess_level_outer = DeclareBuiltIn( - spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), - "tess_level_outer"); - Decorate(tess_level_outer, spv::Decoration::Patch); - - tess_level_inner = DeclareBuiltIn( - spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), - "tess_level_inner"); - Decorate(tess_level_inner, spv::Decoration::Patch); - - invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); - } - - void DeclareTessEval() { - if (stage != ShaderType::TesselationEval) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertex(); - - tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); - DeclareInputVertexArray(num_input); - DeclareOutputVertex(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - - for (u32 rt = 0; rt < static_cast(std::size(frag_colors)); ++rt) { - if (!IsRenderTargetEnabled(rt)) { - continue; - } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); - Name(id, fmt::format("frag_color{}", rt)); - Decorate(id, spv::Decoration::Location, rt); - - frag_colors[rt] = id; - interfaces.push_back(id); - } - - if (header.ps.omap.depth) { - frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); - Name(frag_depth, "frag_depth"); - Decorate(frag_depth, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::FragDepth)); - - interfaces.push_back(frag_depth); - } - - frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); - front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); - point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - - workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); - local_invocation_id = - DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); - } - - void DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("gpr_{}", gpr)); - registers.emplace(gpr, AddGlobalVariable(id)); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("custom_var_{}", i)); - custom_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclarePredicates() { - for (const auto pred : ir.GetPredicates()) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("pred_{}", static_cast(pred))); - predicates.emplace(pred, AddGlobalVariable(id)); - } - } - - void DeclareFlowVariables() { - for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", static_cast(i))); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); - if (lmem_size == 0) { - return; - } - const auto element_count = static_cast(Common::AlignUp(lmem_size, 4) / 4); - const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); - const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); - Name(type_pointer, "LocalMemory"); - - local_memory = - OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); - AddGlobalVariable(Name(local_memory, "local_memory")); - } - - void DeclareSharedMemory() { - if (stage != ShaderType::Compute) { - return; - } - t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - - u32 smem_size = specialization.shared_memory_size * 4; - if (smem_size == 0) { - // Avoid declaring an empty array. - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (smem_size > limit) { - LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", - smem_size, limit); - smem_size = limit; - } - - const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); - const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); - Name(type_pointer, "SharedMemory"); - - shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); - AddGlobalVariable(Name(shared_memory, "shared_memory")); - } - - void DeclareInternalFlags() { - static constexpr std::array names{"zero", "sign", "carry", "overflow"}; - - for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); - } - } - - void DeclareInputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Input; - std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); - } - - void DeclareOutputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Output; - std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); - } - - std::tuple DeclareVertexArray(spv::StorageClass storage_class, - std::string name, u32 length) { - const auto [struct_id, indices] = DeclareVertexStruct(); - const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); - const Id vertex_ptr = TypePointer(storage_class, vertex_array); - const Id vertex = OpVariable(vertex_ptr, storage_class); - AddGlobalVariable(Name(vertex, std::move(name))); - interfaces.push_back(vertex); - return {indices, vertex}; - } - - void DeclareOutputVertex() { - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - } - - void DeclareInputAttributes() { - for (const auto index : ir.GetInputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - const u32 location = GetGenericAttributeLocation(index); - if (!IsAttributeEnabled(location)) { - continue; - } - const auto type_descriptor = GetAttributeType(location); - Id type; - if (IsInputAttributeArray()) { - type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); - type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); - type = TypePointer(spv::StorageClass::Input, type); - } else { - type = type_descriptor.vector; - } - const Id id = OpVariable(type, spv::StorageClass::Input); - AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); - input_attributes.emplace(index, id); - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - - if (stage != ShaderType::Fragment) { - continue; - } - switch (header.ps.GetPixelImap(location)) { - case PixelImap::Constant: - Decorate(id, spv::Decoration::Flat); - break; - case PixelImap::Perspective: - // Default - break; - case PixelImap::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - default: - UNREACHABLE_MSG("Unused attribute being fetched"); - } - } - } - - void DeclareOutputAttributes() { - if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { - return; - } - - UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); - for (const auto index : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - DeclareOutputAttribute(index); - } - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - - const u32 location = GetGenericAttributeLocation(index); - u8 element = 0; - while (element < 4) { - const std::size_t remainder = 4 - element; - - std::size_t num_components = remainder; - const std::optional tfb = GetTransformFeedbackInfo(index, element); - if (tfb) { - num_components = tfb->components; - } - - Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); - Id varying_default = v_varying_default; - if (IsOutputAttributeArray()) { - const u32 num = GetNumOutputVertices(); - type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { - // Intel's proprietary driver fails to setup defaults for arrayed output - // attributes. - varying_default = ConstantComposite(type, std::vector(num, varying_default)); - } - } - type = TypePointer(spv::StorageClass::Output, type); - - std::string name = fmt::format("out_attr{}", location); - if (num_components < 4 || element > 0) { - name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); - } - - const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), name); - - GenericVaryingDescription description; - description.id = id; - description.first_element = element; - description.is_scalar = num_components == 1; - for (u32 i = 0; i < num_components; ++i) { - const u8 offset = static_cast(static_cast(index) * 4 + element + i); - output_attributes.emplace(offset, description); - } - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - if (element > 0) { - Decorate(id, spv::Decoration::Component, static_cast(element)); - } - if (tfb && device.IsExtTransformFeedbackSupported()) { - Decorate(id, spv::Decoration::XfbBuffer, static_cast(tfb->buffer)); - Decorate(id, spv::Decoration::XfbStride, static_cast(tfb->stride)); - Decorate(id, spv::Decoration::Offset, static_cast(tfb->offset)); - } - - element = static_cast(static_cast(element) + num_components); - } - } - - std::optional GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - return it->second; - } - - u32 DeclareConstantBuffers(u32 binding) { - for (const auto& [index, size] : ir.GetConstantBuffers()) { - const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo - : t_cbuf_std140_ubo; - const Id id = OpVariable(type, spv::StorageClass::Uniform); - AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - constant_buffers.emplace(index, id); - } - return binding; - } - - u32 DeclareGlobalBuffers(u32 binding) { - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); - AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(base, id); - } - return binding; - } - - u32 DeclareUniformTexels(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (!sampler.is_buffer) { - continue; - } - ASSERT(!sampler.is_array); - ASSERT(!sampler.is_shadow); - - constexpr auto dim = spv::Dim::Buffer; - constexpr int depth = 0; - constexpr int arrayed = 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); - } - return binding; - } - - u32 DeclareSamplers(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - continue; - } - const auto dim = GetSamplerDim(sampler); - const int depth = sampler.is_shadow ? 1 : 0; - const int arrayed = sampler.is_array ? 1 : 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampler_type = TypeSampledImage(image_type); - const Id sampler_pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampler_type); - const Id type = sampler.is_indexed - ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) - : sampler_type; - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - sampled_images.emplace( - sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); - } - return binding; - } - - u32 DeclareStorageTexels(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type != Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - u32 DeclareImages(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - void DeclareImage(const ImageEntry& image, u32& binding) { - const auto [dim, arrayed] = GetImageDim(image); - constexpr int depth = 0; - constexpr bool ms = false; - constexpr int sampled = 2; // This won't be accessed with a sampler - const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - if (image.is_read && !image.is_written) { - Decorate(id, spv::Decoration::NonWritable); - } else if (image.is_written && !image.is_read) { - Decorate(id, spv::Decoration::NonReadable); - } - - images.emplace(image.index, StorageImage{image_type, id}); - } - - bool IsRenderTargetEnabled(u32 rt) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - return true; - } - } - return false; - } - - bool IsInputAttributeArray() const { - return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || - stage == ShaderType::Geometry; - } - - bool IsOutputAttributeArray() const { - return stage == ShaderType::TesselationControl; - } - - bool IsAttributeEnabled(u32 location) const { - return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; - } - - u32 GetNumInputVertices() const { - switch (stage) { - case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); - case ShaderType::TesselationControl: - case ShaderType::TesselationEval: - return NumInputPatches; - default: - UNREACHABLE(); - return 1; - } - } - - u32 GetNumOutputVertices() const { - switch (stage) { - case ShaderType::TesselationControl: - return header.common2.threads_per_input_primitive; - default: - UNREACHABLE(); - return 1; - } - } - - std::tuple DeclareVertexStruct() { - struct BuiltIn { - Id type; - spv::BuiltIn builtin; - const char* name; - }; - std::vector members; - members.reserve(4); - - const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { - const auto index = static_cast(members.size()); - members.push_back(BuiltIn{type, builtin, name}); - return index; - }; - - VertexIndices indices; - indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); - - if (ir.UsesLayer()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); - } else { - LOG_ERROR( - Render_Vulkan, - "Shader requires Layer but it's not supported on this stage with this device."); - } - } - - if (ir.UsesViewportIndex()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " - "this stage with this device."); - } - } - - if (ir.UsesPointSize() || specialization.point_size) { - indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); - } - - const auto& ir_output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = std::any_of( - ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { - return index == Attribute::Index::ClipDistances0123 || - index == Attribute::Index::ClipDistances4567; - }); - if (declare_clip_distances) { - indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), - spv::BuiltIn::ClipDistance, "clip_distances"); - } - - std::vector member_types; - member_types.reserve(members.size()); - for (std::size_t i = 0; i < members.size(); ++i) { - member_types.push_back(members[i].type); - } - const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); - Decorate(per_vertex_struct, spv::Decoration::Block); - - for (std::size_t index = 0; index < members.size(); ++index) { - const auto& member = members[index]; - MemberName(per_vertex_struct, static_cast(index), member.name); - MemberDecorate(per_vertex_struct, static_cast(index), spv::Decoration::BuiltIn, - static_cast(member.builtin)); - } - - return {per_vertex_struct, indices}; - } - - void VisitBasicBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - const auto operation_index = static_cast(operation->GetCode()); - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {v_float_zero, Type::Float}; - } - return {OpLoad(t_float, registers.at(index)), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - return {Constant(t_uint, immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> Id { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return v_true; - case Tegra::Shader::Pred::NeverExecute: - return v_false; - default: - return OpLoad(t_bool, predicates.at(index)); - } - }(); - if (predicate->IsNegated()) { - return {OpLogicalNot(t_bool, value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - const auto attribute = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const auto& buffer = abuf->GetBuffer(); - - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsInputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - switch (attribute) { - case Attribute::Index::Position: { - if (stage == ShaderType::Fragment) { - return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), - Type::Float}; - } - const std::vector elements = {in_indices.position.value(), element}; - return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; - } - case Attribute::Index::PointCoord: { - switch (element) { - case 0: - case 1: - return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), - Type::Float}; - } - UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); - return {v_float_zero, Type::Float}; - } - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - switch (element) { - case 0: - case 1: - return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), - Type::Float}; - case 2: - return { - OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), - Type::Int}; - case 3: - return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), - Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {Constant(t_uint, 0U), Type::Uint}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - if (element == 3) { - const Id is_front_facing = OpLoad(t_bool, front_facing); - const Id true_value = Constant(t_int, static_cast(-1)); - const Id false_value = Constant(t_int, 0); - return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {v_float_zero, Type::Float}; - default: - if (!IsGenericAttribute(attribute)) { - break; - } - const u32 location = GetGenericAttributeLocation(attribute); - if (!IsAttributeEnabled(location)) { - // Disabled attributes (also known as constant attributes) always return zero. - return {v_float_zero, Type::Float}; - } - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {v_float_zero, Type::Float}; - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node& offset = cbuf->GetOffset(); - const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - - Id pointer{}; - if (device.IsKhrUniformBufferStandardLayoutSupported()) { - const Id buffer_offset = - OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); - pointer = - OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); - } else { - Id buffer_index{}; - Id buffer_element{}; - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - } else if (std::holds_alternative(*offset)) { - // Indirect access - const Id offset_id = AsUint(Visit(offset)); - const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); - const Id final_offset = - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); - buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); - buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); - } - pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, - buffer_element); - } - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto gmem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_prv_float, local_memory, address); - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto smem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const Id flag = internal_flags.at(static_cast(internal_flag->GetFlag())); - return {OpLoad(t_bool, flag), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - // It's invalid to call conditional on nested nodes, use an operation instead - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - const Id condition = AsBool(Visit(conditional->GetCondition())); - OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); - OpBranchConditional(condition, true_label, skip_label); - AddLabel(true_label); - - conditional_branch_set = true; - inside_branch = false; - VisitBasicBlock(conditional->GetCode()); - conditional_branch_set = false; - if (!inside_branch) { - OpBranch(skip_label); - } else { - inside_branch = false; - } - AddLabel(skip_label); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - if (device.HasDebuggingToolAttached()) { - // We should insert comments with OpString instead of using named variables - Name(OpUndef(t_int), comment->GetText()); - } - return {}; - } - - UNREACHABLE(); - return {}; - } - - template - Expression Unary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - - const Id value = (this->*func)(type_def, op_a); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Binary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - - const Id value = (this->*func)(type_def, op_a, op_b); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Ternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Quaternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - const Id op_d = As(Visit(operation[3]), type_d); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target{}; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit its source - // because it might have side effects. - Visit(src); - return {}; - } - target = {registers.at(gpr->GetIndex()), Type::Float}; - - } else if (const auto abuf = std::get_if(&*dest)) { - const auto& buffer = abuf->GetBuffer(); - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsOutputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - target = [&]() -> Expression { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex(); attribute) { - case Attribute::Index::Position: { - const u32 index = out_indices.position.value(); - return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; - } - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 1: { - if (!out_indices.layer) { - return {}; - } - const u32 index = out_indices.layer.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 2: { - if (!out_indices.viewport) { - return {}; - } - const u32 index = out_indices.viewport.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 3: { - const auto index = out_indices.point_size.value(); - return {AccessElement(t_out_float, out_vertex, index), Type::Float}; - } - default: - UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); - return {}; - } - case Attribute::Index::ClipDistances0123: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; - } - case Attribute::Index::ClipDistances4567: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element + 4), - Type::Float}; - } - default: - if (IsGenericAttribute(attribute)) { - const u8 offset = static_cast(static_cast(attribute) * 4 + element); - const GenericVaryingDescription description = output_attributes.at(offset); - const Id composite = description.id; - std::vector indices; - if (!description.is_scalar) { - indices.push_back(element - description.first_element); - } - return {ArrayPass(t_out_float, composite, indices), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", - static_cast(attribute)); - return {}; - } - }(); - - } else if (const auto patch = std::get_if(&*dest)) { - target = [&]() -> Expression { - const u32 offset = patch->GetOffset(); - switch (offset) { - case 0: - case 1: - case 2: - case 3: - return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; - case 4: - case 5: - return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); - return {}; - }(); - - } else if (const auto lmem = std::get_if(&*dest)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpUDiv(t_uint, address, Constant(t_uint, 4)); - target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; - - } else if (const auto smem = std::get_if(&*dest)) { - target = {GetSharedMemoryPointer(*smem), Type::Uint}; - - } else if (const auto gmem = std::get_if(&*dest)) { - target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - - } else if (const auto cv = std::get_if(&*dest)) { - target = {custom_variables.at(cv->GetIndex()), Type::Float}; - - } else { - UNIMPLEMENTED(); - } - - if (!target.id) { - // On failure we return a nullptr target.id, skip these stores. - return {}; - } - - OpStore(target.id, As(Visit(src), target.type)); - return {}; - } - - template - Expression FCastHalf(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), - Type::Float}; - } - - Expression FSwizzleAdd(Operation operation) { - const Id minus = Constant(t_float, -1.0f); - const Id plus = v_float_one; - const Id zero = v_float_zero; - const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); - const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); - - Id mask = OpLoad(t_uint, thread_id); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); - mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - - const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); - const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); - - const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); - const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); - return {OpFAdd(t_float, op_a, op_b), Type::Float}; - } - - Expression HNegate(Operation operation) { - const bool is_f16 = device.IsFloat16Supported(); - const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); - const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); - const auto GetNegate = [&](std::size_t index) { - return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); - }; - const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); - return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const auto Pack = [&](std::size_t index) { - const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); - return OpCompositeConstruct(t_half, scalar, scalar); - }; - const Id value = AsHalfFloat(Visit(operation[0])); - const Id min = Pack(1); - const Id max = Pack(2); - - const Id clamped = OpFClamp(t_half, value, min, max); - if (IsPrecise(operation)) { - Decorate(clamped, spv::Decoration::NoContraction); - } - return {clamped, Type::HalfFloat}; - } - - Expression HCastFloat(Operation operation) { - const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = Visit(operation[0]); - const auto type = std::get(operation.GetMeta()); - if (type == Tegra::Shader::HalfType::H0_H1) { - return operand; - } - const auto value = [&] { - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::F32: - return GetHalfScalarFromFloat(AsFloat(operand)); - case Tegra::Shader::HalfType::H0_H0: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); - case Tegra::Shader::HalfType::H1_H1: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); - default: - UNREACHABLE(); - return ConstantNull(t_half); - } - }(); - return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; - } - - Expression HMergeF32(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; - } - - template - Expression HMergeHN(Operation operation) { - const Id target = AsHalfFloat(Visit(operation[0])); - const Id source = AsHalfFloat(Visit(operation[1])); - const Id object = OpCompositeExtract(t_scalar_half, source, offset); - return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); - return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; - } - - Expression LogicalAddCarry(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - const Id op_b = AsUint(Visit(operation[1])); - - const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); - const Id carry = OpCompositeExtract(t_uint, result, 1); - return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Id target{}; - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = predicates.at(index); - - } else if (const auto flag = std::get_if(&*dest)) { - target = internal_flags.at(static_cast(flag->GetFlag())); - } - - OpStore(target, AsBool(Visit(src))); - return {}; - } - - Expression LogicalFOrdered(Operation operation) { - // Emulate SPIR-V's OpOrdered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); - const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); - return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; - } - - Expression LogicalFUnordered(Operation operation) { - // Emulate SPIR-V's OpUnordered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_nan_a = OpIsNan(t_bool, op_a); - const Id is_nan_b = OpIsNan(t_bool, op_b); - return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; - } - - Id GetTextureSampler(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - ASSERT(!meta.sampler.is_buffer); - - const auto& entry = sampled_images.at(meta.sampler.index); - Id sampler = entry.variable; - if (meta.sampler.is_indexed) { - const Id index = AsInt(Visit(meta.index)); - sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); - } - return OpLoad(entry.sampler_type, sampler); - } - - Id GetTextureImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 index = meta.sampler.index; - if (meta.sampler.is_buffer) { - const auto& entry = uniform_texels.at(index); - return OpLoad(entry.image_type, entry.image); - } else { - const auto& entry = sampled_images.at(index); - return OpImage(entry.image_type, GetTextureSampler(operation)); - } - } - - Id GetImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto entry = images.at(meta.image.index); - return OpLoad(entry.image_type, entry.image); - } - - Id AssembleVector(const std::vector& coords, Type type) { - const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); - return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); - } - - Id GetCoordinates(Operation operation, Type type) { - std::vector coords; - for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { - coords.push_back(As(Visit(operation[i]), type)); - } - if (const auto meta = std::get_if(&operation.GetMeta())) { - // Add array coordinate for textures - if (meta->sampler.is_array) { - Id array = AsInt(Visit(meta->array)); - if (type == Type::Float) { - array = OpConvertSToF(t_float, array); - } - coords.push_back(array); - } - } - return AssembleVector(coords, type); - } - - Id GetOffsetCoordinates(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::vector coords; - coords.reserve(meta.aoffi.size()); - for (const auto& coord : meta.aoffi) { - coords.push_back(AsInt(Visit(coord))); - } - return AssembleVector(coords, Type::Int); - } - - std::pair GetDerivatives(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto& derivatives = meta.derivates; - ASSERT(derivatives.size() % 2 == 0); - - const std::size_t components = derivatives.size() / 2; - std::vector dx, dy; - dx.reserve(components); - dy.reserve(components); - for (std::size_t index = 0; index < components; ++index) { - dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); - dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); - } - return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; - } - - Expression GetTextureElement(Operation operation, Id sample_value, Type type) { - const auto& meta = std::get(operation.GetMeta()); - const auto type_def = GetTypeDefinition(type); - return {OpCompositeExtract(type_def, sample_value, meta.element), type}; - } - - Expression Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const bool can_implicit = stage == ShaderType::Fragment; - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - - std::vector operands; - spv::ImageOperandsMask mask{}; - if (meta.bias) { - mask = mask | spv::ImageOperandsMask::Bias; - operands.push_back(AsFloat(Visit(meta.bias))); - } - - if (!can_implicit) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(v_float_zero); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.depth_compare) { - // Depth sampling - UNIMPLEMENTED_IF(meta.bias); - const Id dref = AsFloat(Visit(meta.depth_compare)); - if (can_implicit) { - return { - OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } else { - return { - OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - } - - Id texture; - if (can_implicit) { - texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); - } else { - texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const Id lod = AsFloat(Visit(meta.lod)); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; - std::vector operands{lod}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - const Id dref = AsFloat(Visit(meta.depth_compare)); - return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id coords = GetCoordinates(operation, Type::Float); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id texture{}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare)), mask, operands); - } else { - u32 component_value = 0; - if (meta.component) { - const auto component = std::get_if(&*meta.component); - ASSERT_MSG(component, "Component is not an immediate value"); - component_value = component->GetValue(); - } - texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value), mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const auto image_id = GetTextureImage(operation); - if (meta.element == 3) { - return {OpImageQueryLevels(t_int, image_id), Type::Int}; - } - - const Id lod = AsUint(Visit(operation[0])); - const std::size_t coords_count = [&meta] { - switch (const auto type = meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return 1; - case Tegra::Shader::TextureType::Texture2D: - case Tegra::Shader::TextureType::TextureCube: - return 2; - case Tegra::Shader::TextureType::Texture3D: - return 3; - default: - UNREACHABLE_MSG("Invalid texture type={}", type); - return 2; - } - }(); - - if (meta.element >= coords_count) { - return {v_float_zero, Type::Float}; - } - - const std::array types = {t_int, t_int2, t_int3}; - const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); - const Id size = OpCompositeExtract(t_int, sizes, meta.element); - return {size, Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - if (meta.element >= 2) { - UNREACHABLE_MSG("Invalid element"); - return {v_float_zero, Type::Float}; - } - const auto sampler_id = GetTextureSampler(operation); - - const Id multiplier = Constant(t_float, 256.0f); - const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); - - const Id coords = GetCoordinates(operation, Type::Float); - Id size = OpImageQueryLod(t_float2, sampler_id, coords); - size = OpFMul(t_float2, size, multipliers); - size = OpConvertFToS(t_int2, size); - return GetTextureElement(operation, size, Type::Int); - } - - Expression TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const Id image = GetTextureImage(operation); - const Id coords = GetCoordinates(operation, Type::Int); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id fetch; - - if (meta.lod && !meta.sampler.is_buffer) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(AsInt(Visit(meta.lod))); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - fetch = OpImageFetch(t_float4, image, coords, mask, operands); - return GetTextureElement(operation, fetch, Type::Float); - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const auto [dx, dy] = GetDerivatives(operation); - const std::vector grad = {dx, dy}; - - static constexpr auto mask = spv::ImageOperandsMask::Grad; - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression ImageLoad(Operation operation) { - if (!device.IsFormatlessImageLoadSupported()) { - return {v_float_zero, Type::Float}; - } - - const auto& meta{std::get(operation.GetMeta())}; - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); - - return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto meta{std::get(operation.GetMeta())}; - std::vector colors; - for (const auto& value : meta.values) { - colors.push_back(AsUint(Visit(value))); - } - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpCompositeConstruct(t_uint4, colors); - - OpImageWrite(GetImage(operation), coords, texel, {}); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - const Id coordinate = GetCoordinates(operation, Type::Int); - const Id image = images.at(meta.image.index).image; - const Id sample = v_uint_zero; - const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); - - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(meta.values[0])); - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - Id pointer; - if (const auto smem = std::get_if(&*operation[0])) { - pointer = GetSharedMemoryPointer(*smem); - } else if (const auto gmem = std::get_if(&*operation[0])) { - pointer = GetGlobalMemoryPointer(*gmem); - } else { - UNREACHABLE(); - return {v_float_zero, Type::Float}; - } - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(operation[1])); - - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - Atomic(operation); - return {}; - } - - Expression Branch(Operation operation) { - const auto& target = std::get(*operation[0]); - OpStore(jmp_to, Constant(t_uint, target.GetValue())); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression BranchIndirect(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - - OpStore(jmp_to, op_a); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto& target = std::get(*operation[0]); - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, current); - - OpStore(access, Constant(t_uint, target.GetValue())); - OpStore(flow_stack_top, next); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, previous); - const Id target = OpLoad(t_uint, access); - - OpStore(flow_stack_top, previous); - OpStore(jmp_to, target); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { - using Compare = Maxwell::ComparisonOp; - switch (compare_op) { - case Compare::NeverOld: - return v_false; // Never let the test pass - case Compare::LessOld: - return OpFOrdLessThan(t_bool, operand_1, operand_2); - case Compare::EqualOld: - return OpFOrdEqual(t_bool, operand_1, operand_2); - case Compare::LessEqualOld: - return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); - case Compare::GreaterOld: - return OpFOrdGreaterThan(t_bool, operand_1, operand_2); - case Compare::NotEqualOld: - return OpFOrdNotEqual(t_bool, operand_1, operand_2); - case Compare::GreaterEqualOld: - return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); - default: - UNREACHABLE(); - return v_true; - } - } - - void AlphaTest(Id pointer) { - if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { - return; - } - const Id true_label = OpLabel(); - const Id discard_label = OpLabel(); - const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); - const Id alpha_value = OpLoad(t_float, pointer); - const Id condition = - MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); - - OpBranchConditional(condition, true_label, discard_label); - AddLabel(discard_label); - OpKill(); - AddLabel(true_label); - } - - void PreExit() { - if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { - const u32 position_index = out_indices.position.value(); - const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); - const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); - Id depth = OpLoad(t_float, z_pointer); - depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); - depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); - OpStore(z_pointer, depth); - } - if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [this](u32 reg) { - if (const auto it = registers.find(reg); it != registers.end()) { - return OpLoad(t_float, it->second); - } - return v_float_zero; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, - "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); - OpStore(pointer, SafeGetRegister(current_reg)); - if (rt == 0 && component == 3) { - AlphaTest(pointer); - } - ++current_reg; - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and - // current_reg already contains one past the last color register. - OpStore(frag_depth, SafeGetRegister(current_reg + 1)); - } - } - } - - Expression Exit(Operation operation) { - PreExit(); - inside_branch = true; - if (conditional_branch_set) { - OpReturn(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpReturn(); - AddLabel(); - } - return {}; - } - - Expression Discard(Operation operation) { - inside_branch = true; - if (conditional_branch_set) { - OpKill(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpKill(); - AddLabel(); - } - return {}; - } - - Expression EmitVertex(Operation) { - OpEmitVertex(); - return {}; - } - - Expression EndPrimitive(Operation operation) { - OpEndPrimitive(); - return {}; - } - - Expression InvocationId(Operation) { - return {OpLoad(t_int, invocation_id), Type::Int}; - } - - Expression YNegate(Operation) { - LOG_WARNING(Render_Vulkan, "(STUBBED)"); - return {Constant(t_float, 1.0f), Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - const Id id = OpLoad(t_uint3, local_invocation_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation operation) { - const Id id = OpLoad(t_uint3, workgroup_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const Id predicate = AsBool(Visit(operation[0])); - const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); - - if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { - // Guest-like devices can just return the first index. - return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; - } - - // The others will have to return what is local to the current thread. - // For instance a device with a warp size of 64 will return the upper uint when the current - // thread is 38. - const Id tid = OpLoad(t_uint, thread_id); - const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); - return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; - } - - template - Expression Vote(Operation operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id predicate = AsBool(Visit(operation[0])); - return {(this->*func)(t_bool, predicate), Type::Bool}; - } - - Expression ThreadId(Operation) { - return {OpLoad(t_uint, thread_id), Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id mask = thread_masks[index]; - return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - const Id value = AsFloat(Visit(operation[0])); - const Id index = AsUint(Visit(operation[1])); - return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); - return {}; - } - - const auto scope = spv::Scope::Workgroup; - const auto memory = spv::Scope::Workgroup; - const auto semantics = - spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; - OpControlBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(memory)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - template - Expression MemoryBarrier(Operation) { - const auto semantics = - spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | - spv::MemorySemanticsMask::WorkgroupMemory | - spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; - - OpMemoryBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { - const Id id = OpVariable(type, storage); - Decorate(id, spv::Decoration::BuiltIn, static_cast(builtin)); - AddGlobalVariable(Name(id, std::move(name))); - interfaces.push_back(id); - return id; - } - - Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { - return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); - } - - template - Id AccessElement(Id pointer_type, Id composite, Args... elements_) { - std::vector members; - auto elements = {elements_...}; - for (const auto element : elements) { - members.push_back(Constant(t_uint, element)); - } - - return OpAccessChain(pointer_type, composite, members); - } - - Id As(Expression expr, Type wanted_type) { - switch (wanted_type) { - case Type::Bool: - return AsBool(expr); - case Type::Bool2: - return AsBool2(expr); - case Type::Float: - return AsFloat(expr); - case Type::Int: - return AsInt(expr); - case Type::Uint: - return AsUint(expr); - case Type::HalfFloat: - return AsHalfFloat(expr); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsBool(Expression expr) { - ASSERT(expr.type == Type::Bool); - return expr.id; - } - - Id AsBool2(Expression expr) { - ASSERT(expr.type == Type::Bool2); - return expr.id; - } - - Id AsFloat(Expression expr) { - switch (expr.type) { - case Type::Float: - return expr.id; - case Type::Int: - case Type::Uint: - return OpBitcast(t_float, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_float, expr.id); - } - return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsInt(Expression expr) { - switch (expr.type) { - case Type::Int: - return expr.id; - case Type::Float: - case Type::Uint: - return OpBitcast(t_int, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_int, expr.id); - } - return OpPackHalf2x16(t_int, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsUint(Expression expr) { - switch (expr.type) { - case Type::Uint: - return expr.id; - case Type::Float: - case Type::Int: - return OpBitcast(t_uint, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_uint, expr.id); - } - return OpPackHalf2x16(t_uint, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsHalfFloat(Expression expr) { - switch (expr.type) { - case Type::HalfFloat: - return expr.id; - case Type::Float: - case Type::Int: - case Type::Uint: - if (device.IsFloat16Supported()) { - return OpBitcast(t_half, expr.id); - } - return OpUnpackHalf2x16(t_half, AsUint(expr)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id GetHalfScalarFromFloat(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_scalar_half, value); - } - return value; - } - - Id GetFloatFromHalfScalar(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_float, value); - } - return value; - } - - AttributeType GetAttributeType(u32 location) const { - if (stage != ShaderType::Vertex) { - return {Type::Float, t_in_float, t_in_float4}; - } - switch (specialization.attribute_types.at(location)) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return {Type::Float, t_in_float, t_in_float4}; - case Maxwell::VertexAttribute::Type::SignedInt: - return {Type::Int, t_in_int, t_in_int4}; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return {Type::Uint, t_in_uint, t_in_uint4}; - default: - UNREACHABLE(); - return {Type::Float, t_in_float, t_in_float4}; - } - } - - Id GetTypeDefinition(Type type) const { - switch (type) { - case Type::Bool: - return t_bool; - case Type::Bool2: - return t_bool2; - case Type::Float: - return t_float; - case Type::Int: - return t_int; - case Type::Uint: - return t_uint; - case Type::HalfFloat: - return t_half; - default: - UNREACHABLE(); - return {}; - } - } - - std::array GetTypeVectorDefinitionLut(Type type) const { - switch (type) { - case Type::Float: - return {t_float, t_float2, t_float3, t_float4}; - case Type::Int: - return {t_int, t_int2, t_int3, t_int4}; - case Type::Uint: - return {t_uint, t_uint2, t_uint3, t_uint4}; - default: - UNIMPLEMENTED(); - return {}; - } - } - - std::tuple CreateFlowStack() { - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - constexpr auto storage_class = spv::StorageClass::Function; - - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); - const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, - ConstantNull(flow_stack_type)); - const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); - AddLocalVariable(stack); - AddLocalVariable(top); - return std::tie(stack, top); - } - - std::pair GetFlowStack(Operation operation) { - const auto stack_class = std::get(operation.GetMeta()); - switch (stack_class) { - case MetaStackClass::Ssy: - return {ssy_flow_stack, ssy_flow_stack_top}; - case MetaStackClass::Pbk: - return {pbk_flow_stack, pbk_flow_stack_top}; - } - UNREACHABLE(); - return {}; - } - - Id GetGlobalMemoryPointer(const GmemNode& gmem) { - const Id real = AsUint(Visit(gmem.GetRealAddress())); - const Id base = AsUint(Visit(gmem.GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - const Id buffer = global_buffers.at(gmem.GetDescriptor()); - return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); - } - - Id GetSharedMemoryPointer(const SmemNode& smem) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - return OpAccessChain(t_smem_uint, shared_memory, address); - } - - static constexpr std::array operation_decompilers = { - &SPIRVDecompiler::Assign, - - &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, - Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, - &SPIRVDecompiler::FCastHalf<0>, - &SPIRVDecompiler::FCastHalf<1>, - &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, - &SPIRVDecompiler::FSwizzleAdd, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, - - &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, - &SPIRVDecompiler::HNegate, - &SPIRVDecompiler::HClamp, - &SPIRVDecompiler::HCastFloat, - &SPIRVDecompiler::HUnpack, - &SPIRVDecompiler::HMergeF32, - &SPIRVDecompiler::HMergeHN<0>, - &SPIRVDecompiler::HMergeHN<1>, - &SPIRVDecompiler::HPack2, - - &SPIRVDecompiler::LogicalAssign, - &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, - &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, - Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::LogicalFOrdered, - &SPIRVDecompiler::LogicalFUnordered, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, - - &SPIRVDecompiler::LogicalAddCarry, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - // TODO(Rodrigo): Should these use the OpFUnord* variants? - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - - &SPIRVDecompiler::Texture, - &SPIRVDecompiler::TextureLod, - &SPIRVDecompiler::TextureGather, - &SPIRVDecompiler::TextureQueryDimensions, - &SPIRVDecompiler::TextureQueryLod, - &SPIRVDecompiler::TexelFetch, - &SPIRVDecompiler::TextureGradient, - - &SPIRVDecompiler::ImageLoad, - &SPIRVDecompiler::ImageStore, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Branch, - &SPIRVDecompiler::BranchIndirect, - &SPIRVDecompiler::PushFlowStack, - &SPIRVDecompiler::PopFlowStack, - &SPIRVDecompiler::Exit, - &SPIRVDecompiler::Discard, - - &SPIRVDecompiler::EmitVertex, - &SPIRVDecompiler::EndPrimitive, - - &SPIRVDecompiler::InvocationId, - &SPIRVDecompiler::YNegate, - &SPIRVDecompiler::LocalInvocationId<0>, - &SPIRVDecompiler::LocalInvocationId<1>, - &SPIRVDecompiler::LocalInvocationId<2>, - &SPIRVDecompiler::WorkGroupId<0>, - &SPIRVDecompiler::WorkGroupId<1>, - &SPIRVDecompiler::WorkGroupId<2>, - - &SPIRVDecompiler::BallotThread, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, - - &SPIRVDecompiler::ThreadId, - &SPIRVDecompiler::ThreadMask<0>, // Eq - &SPIRVDecompiler::ThreadMask<1>, // Ge - &SPIRVDecompiler::ThreadMask<2>, // Gt - &SPIRVDecompiler::ThreadMask<3>, // Le - &SPIRVDecompiler::ThreadMask<4>, // Lt - &SPIRVDecompiler::ShuffleIndexed, - - &SPIRVDecompiler::Barrier, - &SPIRVDecompiler::MemoryBarrier, - &SPIRVDecompiler::MemoryBarrier, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - const Device& device; - const ShaderIR& ir; - const ShaderType stage; - const Tegra::Shader::Header header; - const Registry& registry; - const Specialization& specialization; - std::unordered_map transform_feedback; - - const Id t_void = Name(TypeVoid(), "void"); - - const Id t_bool = Name(TypeBool(), "bool"); - const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); - - const Id t_int = Name(TypeInt(32, true), "int"); - const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); - const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); - const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); - - const Id t_uint = Name(TypeInt(32, false), "uint"); - const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); - const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); - const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); - - const Id t_float = Name(TypeFloat(32), "float"); - const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); - const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); - const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); - - const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); - const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); - - const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); - - const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); - const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); - const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); - const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); - const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); - const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); - const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); - const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); - const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); - const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); - - const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); - - const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); - const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); - - const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_std140 = Decorate( - Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), - spv::Decoration::ArrayStride, 16U); - const Id t_cbuf_scalar = Decorate( - Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), - spv::Decoration::ArrayStride, 4U); - const Id t_cbuf_std140_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_scalar_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); - const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); - - Id t_smem_uint{}; - - const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); - const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); - const Id t_gmem_struct = MemberDecorate( - Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); - - const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); - - const Id v_float_zero = Constant(t_float, 0.0f); - const Id v_float_one = Constant(t_float, 1.0f); - const Id v_uint_zero = Constant(t_uint, 0); - - // Nvidia uses these defaults for varyings (e.g. position and generic attributes) - const Id v_varying_default = - ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); - - const Id v_true = ConstantTrue(t_bool); - const Id v_false = ConstantFalse(t_bool); - - Id t_scalar_half{}; - Id t_half{}; - - Id out_vertex{}; - Id in_vertex{}; - std::map registers; - std::map custom_variables; - std::map predicates; - std::map flow_variables; - Id local_memory{}; - Id shared_memory{}; - std::array internal_flags{}; - std::map input_attributes; - std::unordered_map output_attributes; - std::map constant_buffers; - std::map global_buffers; - std::map uniform_texels; - std::map sampled_images; - std::map images; - - std::array frag_colors{}; - Id instance_index{}; - Id vertex_index{}; - Id base_instance{}; - Id base_vertex{}; - Id frag_depth{}; - Id frag_coord{}; - Id front_facing{}; - Id point_coord{}; - Id tess_level_outer{}; - Id tess_level_inner{}; - Id tess_coord{}; - Id invocation_id{}; - Id workgroup_id{}; - Id local_invocation_id{}; - Id thread_id{}; - std::array thread_masks{}; // eq, ge, gt, le, lt - - VertexIndices in_indices; - VertexIndices out_indices; - - std::vector interfaces; - - Id jmp_to{}; - Id ssy_flow_stack_top{}; - Id pbk_flow_stack_top{}; - Id ssy_flow_stack{}; - Id pbk_flow_stack{}; - Id continue_label{}; - std::map labels; - - bool conditional_branch_set{}; - bool inside_branch{}; -}; - -class ExprDecompiler { -public: - explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - Id operator()(const ExprAnd& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalAnd(type_def, op1, op2); - } - - Id operator()(const ExprOr& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalOr(type_def, op1, op2); - } - - Id operator()(const ExprNot& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - return decomp.OpLogicalNot(type_def, op1); - } - - Id operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); - } - - Id operator()(const ExprCondCode& expr) { - return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); - } - - Id operator()(const ExprVar& expr) { - return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); - } - - Id operator()(const ExprBoolean& expr) { - return expr.value ? decomp.v_true : decomp.v_false; - } - - Id operator()(const ExprGprEqual& expr) { - const Id target = decomp.Constant(decomp.t_uint, expr.value); - Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); - gpr = decomp.OpBitcast(decomp.t_uint, gpr); - return decomp.OpIEqual(decomp.t_bool, gpr, target); - } - - Id Visit(const Expr& node) { - return std::visit(*this, *node); - } - -private: - SPIRVDecompiler& decomp; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(endif_label); - decomp.AddLabel(endif_label); - } - - void operator()([[maybe_unused]] const ASTIfElse& ast) { - UNREACHABLE(); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBasicBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpStore(decomp.flow_variables.at(ast.index), condition); - } - - void operator()([[maybe_unused]] const ASTLabel& ast) { - // Do nothing - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - const Id loop_label = decomp.OpLabel(); - const Id endloop_label = decomp.OpLabel(); - const Id loop_start_block = decomp.OpLabel(); - const Id loop_continue_block = decomp.OpLabel(); - current_loop_exit = endloop_label; - decomp.OpBranch(loop_label); - decomp.AddLabel(loop_label); - decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); - decomp.OpBranch(loop_start_block); - decomp.AddLabel(loop_start_block); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(loop_continue_block); - decomp.AddLabel(loop_continue_block); - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpBranchConditional(condition, loop_label, endloop_label); - decomp.AddLabel(endloop_label); - } - - void operator()(const ASTReturn& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(decomp.OpLabel()); - } - } - - void operator()(const ASTBreak& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(decomp.OpLabel()); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - SPIRVDecompiler& decomp; - Id current_loop_exit{}; -}; - -void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", i)); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - - DefinePrologue(); - - const ASTNode program = ir.GetASTProgram(); - ASTDecompiler decompiler{*this}; - decompiler.Visit(program); - - const Id next_block = OpLabel(); - OpBranch(next_block); - AddLabel(next_block); -} - -} // Anonymous namespace - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second, cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_buffers.emplace_back(GlobalBufferEntry{ - .cbuf_index = base.cbuf_index, - .cbuf_offset = base.cbuf_offset, - .is_written = usage.is_written, - }); - } - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - entries.uniform_texels.emplace_back(sampler); - } else { - entries.samplers.emplace_back(sampler); - } - } - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - entries.storage_texels.emplace_back(image); - } else { - entries.images.emplace_back(image); - } - } - for (const auto& attribute : ir.GetInputAttributes()) { - if (IsGenericAttribute(attribute)) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); - } - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - entries.uses_warps = ir.UsesWarps(); - return entries; -} - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const VideoCommon::Shader::Registry& registry, - const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace Vulkan { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using StorageTexelEntry = VideoCommon::Shader::ImageEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -constexpr u32 DESCRIPTOR_SET = 0; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) - : ConstBuffer{entry_}, index{index_} {} - - constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -struct GlobalBufferEntry { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_written{}; -}; - -struct ShaderEntries { - u32 NumBindings() const { - return static_cast(const_buffers.size() + global_buffers.size() + - uniform_texels.size() + samplers.size() + storage_texels.size() + - images.size()); - } - - std::vector const_buffers; - std::vector global_buffers; - std::vector uniform_texels; - std::vector samplers; - std::vector storage_texels; - std::vector images; - std::set attributes; - std::array clip_distances{}; - std::size_t shader_length{}; - u32 enabled_uniform_buffers{}; - bool uses_warps{}; -}; - -struct Specialization final { - u32 base_binding{}; - - // Compute specific - std::array workgroup_size{}; - u32 shared_memory_size{}; - - // Graphics specific - std::optional point_size; - std::bitset enabled_attributes; - std::array attribute_types{}; - bool ndc_minus_one_to_one{}; - bool early_fragment_tests{}; - float alpha_test_ref{}; - Maxwell::ComparisonOp alpha_test_func{}; -}; -// Old gcc versions don't consider this trivially copyable. -// static_assert(std::is_trivially_copyable_v); - -struct SPIRVShader { - std::vector code; - ShaderEntries entries; -}; - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, - const VideoCommon::Shader::Registry& registry, - const Specialization& specialization); - -} // namespace Vulkan -- cgit v1.2.3 From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: shader: Remove old shader management --- src/video_core/renderer_vulkan/blit_image.cpp | 1 - .../renderer_vulkan/vk_compute_pipeline.cpp | 136 +----- .../renderer_vulkan/vk_compute_pipeline.h | 47 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 484 --------------------- .../renderer_vulkan/vk_graphics_pipeline.h | 103 ----- .../renderer_vulkan/vk_pipeline_cache.cpp | 375 +--------------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 91 +--- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 361 +-------------- src/video_core/renderer_vulkan/vk_rasterizer.h | 47 +- 9 files changed, 41 insertions(+), 1604 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -8,146 +8,14 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} +ComputePipeline::ComputePipeline() = default; -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; - } - - return device.GetLogical().CreateComputePipeline(ci); -} +ComputePipeline::~ComputePipeline() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -15,50 +14,10 @@ class Device; class VKScheduler; class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector& code) const; - - vk::Pipeline CreatePipeline() const; - - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; - vk::Pipeline pipeline; + explicit ComputePipeline(); + ~ComputePipeline(); }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -MICROPROFILE_DECLARE(Vulkan_PipelineCache); - -namespace { - -template -VkStencilOpState GetStencilFaceState(const StencilFace& face) { - return { - .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), - .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), - .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), - .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), - .compareMask = 0, - .writeMask = 0, - .reference = 0, - }; -} - -bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); -} - -VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union Swizzle { - u32 raw; - BitField<0, 3, Maxwell::ViewportSwizzle> x; - BitField<4, 3, Maxwell::ViewportSwizzle> y; - BitField<8, 3, Maxwell::ViewportSwizzle> z; - BitField<12, 3, Maxwell::ViewportSwizzle> w; - }; - const Swizzle unpacked{swizzle}; - - return { - .x = MaxwellToVK::ViewportSwizzle(unpacked.x), - .y = MaxwellToVK::ViewportSwizzle(unpacked.y), - .z = MaxwellToVK::ViewportSwizzle(unpacked.z), - .w = MaxwellToVK::ViewportSwizzle(unpacked.w), - }; -} - -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; - } -} - -} // Anonymous namespace - -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); -} - -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); -} - -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); - } - } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); -} - -std::vector VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; - - std::vector shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; - } - - device.SaveShader(stage->code); - - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); - } - return shader_modules; -} - -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector vertex_bindings; - std::vector vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = state.binding_divisors[index], - }); - } - } - - std::vector vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; - } - if (!input_attributes.contains(static_cast(index))) { - // Skip attributes not used by the vertex shaders. - continue; - } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); - } - - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), - }; - - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), - .pVertexBindingDivisors = vertex_binding_divisors.data(), - }; - if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &input_divisor_ci; - } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), - }; - - const VkPipelineTessellationStateCreateInfo tessellation_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; - - std::array swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewportSwizzles = swizzles.data(), - }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), - .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = static_cast( - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), - .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisample_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthTestEnable = dynamic.depth_test_enable, - .depthWriteEnable = dynamic.depth_write_enable, - .depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, - .stencilTestEnable = dynamic.stencil_enable, - .front = GetStencilFaceState(dynamic.front), - .back = GetStencilFaceState(dynamic.back), - .minDepthBounds = 0.0f, - .maxDepthBounds = 0.0f, - }; - - std::array cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ - VK_COLOR_COMPONENT_R_BIT, - VK_COLOR_COMPONENT_G_BIT, - VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT, - }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } - } - - cb_attachments[index] = { - .blendEnable = blend.enable != 0, - .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), - .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), - .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), - .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), - .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), - .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; - } - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, - .pAttachments = cb_attachments.data(), - .blendConstants = {}, - }; - - std::vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended{ - VK_DYNAMIC_STATE_CULL_MODE_EXT, - VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, - VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_OP_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - } - - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - std::vector shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { - continue; - } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { - stage_ci.pNext = &subgroup_size_ci; - } - } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class Device; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - -using SPIRVProgram = std::array, Maxwell::MaxShaderStage>; - -class VKGraphicsPipeline final { -public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span bindings) const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; - - std::vector CreateShaderModules(const SPIRVProgram& program) const; - - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; - - const Device& device; - VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector modules; - - vk::Pipeline pipeline; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,49 +19,27 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { - -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { +size_t StageFromProgram(size_t program) { return program == 0 ? 0 : program - 1; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(GetStageFromProgram(static_cast(program))); +ShaderType StageFromProgram(Maxwell::ShaderProgram program) { + return static_cast(StageFromProgram(static_cast(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { return ShaderType::Vertex; } } - -template -void AddBindings(std::vector& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; - } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); - } -} - -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings(bindings, binding, flags, entries.const_buffers); - AddBindings(bindings, binding, flags, entries.global_buffers); - AddBindings(bindings, binding, flags, entries.uniform_texels); - AddBindings(bindings, binding, flags, entries.samplers); - AddBindings(bindings, binding, flags, entries.storage_texels); - AddBindings(bindings, binding, flags, entries.images); - return binding; -} - } // Anonymous namespace -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); - return static_cast(hash); -} - -bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, Size()) == 0; -} - -std::size_t ComputePipelineCacheKey::Hash() const noexcept { +size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); + return static_cast(hash); } bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} +Shader::Shader() = default; Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} -VKPipelineCache::~VKPipelineCache() = default; - -std::array VKPipelineCache::GetShaders() { - std::array shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); - - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } - } - shaders[index] = result; - } - return last_shaders = shaders; -} - -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; - } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); - } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; -} +PipelineCache::~PipelineCache() = default; -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader_info = std::make_unique(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); - - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } - } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; -} - -void VKPipelineCache::EmplacePipeline(std::unique_ptr pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); -} - -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; - continue; - } - Finish(); - it = graphics_cache.erase(it); - } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; - continue; - } - Finish(); - it = compute_cache.erase(it); - } -} - -std::pair> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector bindings; - - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); - } - return {std::move(program), std::move(bindings)}; + throw "Bad"; } -template -void AddEntry(std::vector& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; - } - return; - } - - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - } - offset += count * entry_size; - binding += count; -} - -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries) { - AddEntry(template_entries, offset, binding, entries.const_buffers); - AddEntry(template_entries, offset, binding, entries.global_buffers); - AddEntry(template_entries, offset, binding, entries.uniform_texels); - AddEntry(template_entries, offset, binding, entries.samplers); - AddEntry(template_entries, offset, binding, entries.storage_texels); - AddEntry(template_entries, offset, binding, entries.images); -} +void PipelineCache::OnShaderRemoval(Shader*) {} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -15,15 +15,8 @@ #include #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -35,7 +28,7 @@ namespace Vulkan { class Device; class RasterizerVulkan; -class VKComputePipeline; +class ComputePipeline; class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { u32 shared_memory_size; std::array workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v); namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - template <> struct hash { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -83,66 +69,26 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); + explicit Shader(); ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; }; -class VKPipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; - - std::array GetShaders(); + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~PipelineCache() override; - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); - - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - - void EmplacePipeline(std::unique_ptr pipeline); + ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: void OnShaderRemoval(Shader* shader) final; private: - std::pair> DecompileShaders( - const FixedPipelineState& fixed_state); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -158,17 +104,8 @@ private: std::array last_shaders{}; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::mutex pipeline_cache; - std::unordered_map> - graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> compute_cache; }; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries); - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array GetShaderAddresses( - const std::array& shaders) { - std::array addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { const Tegra::Texture::TextureHandle handle{data}; @@ -117,98 +107,6 @@ struct TextureHandle { u32 sampler; }; -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast(stage); - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra descriptor_pool, update_descriptor_queue), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - - SCOPE_EXIT({ gpu.TickWork(); }); - FlushWork(); - - query_cache.UpdateCounters(); - - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. - return; - } - - BeginTransformFeedback(); - - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - UpdateDynamicStates(); - - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); - } - }); - - EndTransformFeedback(); + UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); } void RasterizerVulkan::Clear() { @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); - - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); - - // Compute dispatches can't be executed inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); +void RasterizerVulkan::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,7 +28,6 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -103,19 +102,6 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -125,40 +111,12 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -198,13 +156,12 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector image_view_indices; std::array image_view_ids; -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 ++++++++++++++- .../renderer_vulkan/vk_compute_pipeline.h | 43 ++++- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +++++++++++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 ++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- src/video_core/renderer_vulkan/vk_resource_pool.h | 12 +- 11 files changed, 428 insertions(+), 77 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496..588ce6139 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include +#include + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); +} + +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace + +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); -ComputePipeline::ComputePipeline() = default; + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} -ComputePipeline::~ComputePipeline() = default; +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3d..dc045d524 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..3bea1ff44 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..2501f9967 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..b06288403 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180..4bf3e4819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + + ~Environment() override = default; + + [[nodiscard]] std::optional Analyze(u32 start_address) { + const std::optional size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + } -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(StageFromProgram(static_cast(program))); -} + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast(data.get()), size); } -} + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read(program_base + address); + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector code; + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; + } + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340d..eb35abc27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -67,13 +67,13 @@ struct hash { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map> compute_cache; + std::unordered_map compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d9..b757454c4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b..3fd03b915 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector ticks; ///< Ticks for each resource -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 8 ++++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..a444d55d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + /* + FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); + */ } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4bf3e4819..c2a41a360 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -31,8 +31,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); @@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { // TODO: Load from cache } const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-dis D:\\shader.spv"); + shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b757454c4..1b662f9f3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,8 +36,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 7 +------ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); -- cgit v1.2.3 From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: shader: Rename, implement FADD.SAT and P2R (imm) --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 49ff911d6..b25af6cd3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; - + /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); - + */ shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, -- cgit v1.2.3 From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: spirv: Fixes and Intel specific workarounds --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b25af6cd3..2497c2385 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -189,6 +189,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; /* -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- .../renderer_vulkan/vk_compute_pipeline.cpp | 101 +++++++++++++++++++++ .../renderer_vulkan/vk_compute_pipeline.h | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +- 4 files changed, 111 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a658a3276..ef8bef6ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh }); ++binding; } + for (const auto& desc : info.texture_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } return device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( ++binding; offset += sizeof(DescriptorUpdateEntry); } + for (const auto& desc : info.texture_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, .pNext = nullptr, @@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( .set = 0, }); } + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; + +VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); +} } // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, @@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.BindHostComputeBuffers(); } +void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, + TextureCache& texture_cache) { + texture_cache.SynchronizeComputeDescriptors(); + + static constexpr size_t max_elements = 64; + std::array image_view_ids; + boost::container::static_vector image_view_indices; + boost::container::static_vector sampler_handles; + + const auto& launch_desc{kepler_compute.launch_description}; + const auto& cbufs{launch_desc.const_buffer_config}; + const bool via_header_index{launch_desc.linked_tsc}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); + + const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; + const u32 raw_handle{gpu_memory.Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + } + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + size_t index{}; + for (const auto& desc : info.texture_descriptors) { + const VkSampler vk_sampler{sampler_handles[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); + ++index; + } +} + VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index dc045d524..08d73a2a4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,9 +6,11 @@ #include "common/common_types.h" #include "shader_recompiler/shader_info.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -30,6 +32,8 @@ public: ComputePipeline(const ComputePipeline&) = delete; void ConfigureBufferCache(BufferCache& buffer_cache); + void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2497c2385..bcb7dd2eb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,10 @@ public: return gpu_memory.Read(program_base + address); } + u32 TextureBoundBuffer() override { + return kepler_compute.regs.tex_cb_index; + } + std::array WorkgroupSize() override { const auto& qmd{kepler_compute.launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1b662f9f3..c94419d29 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex}; + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; update_descriptor_queue.Acquire(); pipeline->ConfigureBufferCache(buffer_cache); + pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; const auto& qmd{kepler_compute.launch_description}; -- cgit v1.2.3 From b9f7bf4472b8e0a5aad1aec3a5ff5bb56470bfff Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 14 Mar 2021 01:51:40 -0500 Subject: spirv: Add SignedZeroInfNanPreserve logic --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bcb7dd2eb..5477a2903 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -193,6 +193,10 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 + .../renderer_vulkan/fixed_pipeline_state.h | 9 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 24 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 162 ++++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 209 ++-------- .../renderer_vulkan/vk_compute_pipeline.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 445 +++++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 66 +++ src/video_core/renderer_vulkan/vk_pipeline.h | 36 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 346 ++++++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 82 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 47 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_render_pass_cache.cpp | 100 +++++ .../renderer_vulkan/vk_render_pass_cache.h | 53 +++ .../renderer_vulkan/vk_texture_cache.cpp | 68 +--- src/video_core/renderer_vulkan/vk_texture_cache.h | 29 +- 18 files changed, 1278 insertions(+), 409 deletions(-) create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d8f683907 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast(regs.zeta.format)); + std::ranges::transform(regs.rt, color_formats.begin(), + [](const auto& rt) { return static_cast(rt.format); }); alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..348f1d6ce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,7 +60,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array Mask() const noexcept { + std::array Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +97,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast(size.Value()); } }; @@ -187,7 +187,10 @@ struct FixedPipelineState { u32 raw2; BitField<0, 3, u32> alpha_test_func; BitField<3, 1, u32> early_z; + BitField<4, 1, u32> depth_enabled; + BitField<5, 5, u32> depth_format; }; + std::array color_formats; u32 alpha_test_ref; u32 point_size; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..dc4ff0da2 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..9f78e15b6 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..0a59aa659 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,162 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + [[likely]] if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +struct DescriptorLayoutTuple { + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; +}; + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutTuple Create(const vk::Device& device) { + DescriptorLayoutTuple result; + if (!bindings.empty()) { + result.descriptor_set_layout = device.CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + result.pipeline_layout = device.CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + if (!entries.empty()) { + result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *result.descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *result.pipeline_layout, + .set = 0, + }); + } + return result; + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); + } + } + +private: + void Add(VkDescriptorType type, VkShaderStageFlags stage) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + + boost::container::small_vector bindings; + boost::container::small_vector entries; + u32 binding{}; + size_t offset{}; +}; + +inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); + return {}; +} + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, + const ImageId* image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { + for (const auto& desc : info.texture_descriptors) { + const VkSampler sampler{samplers[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + ++index; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ff..6684d37a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,6 +6,7 @@ #include +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -17,140 +18,10 @@ namespace Vulkan { namespace { -vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { - boost::container::small_vector bindings; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for (const auto& desc : info.texture_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) { - boost::container::small_vector entries; - size_t offset{}; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for (const auto& desc : info.texture_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, - .pipelineLayout = pipeline_layout, - .set = 0, - }); -} - -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - } - UNREACHABLE_MSG("Invalid texture type {}", type); +DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { + DescriptorLayoutBuilder builder; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + return builder.Create(device.GetLogical()); } } // Anonymous namespace @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip VKUpdateDescriptorQueue& update_descriptor_queue_, const Shader::Info& info_, vk::ShaderModule spv_module_) : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, - spv_module(std::move(spv_module_)), - descriptor_set_layout(CreateDescriptorSetLayout(device, info)), - descriptor_allocator(descriptor_pool, *descriptor_set_layout), - pipeline_layout{device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - })}, - descriptor_update_template{ - CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, - pipeline{device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - })} {} + spv_module(std::move(spv_module_)) { + DescriptorLayoutTuple tuple{CreateLayout(device, info)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); +} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple static constexpr size_t max_elements = 64; std::array image_view_ids; boost::container::static_vector image_view_indices; - boost::container::static_vector sampler_handles; + boost::container::static_vector samplers; const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); + samplers.push_back(sampler->Handle()); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); size_t index{}; - for (const auto& desc : info.texture_descriptors) { - const VkSampler vk_sampler{sampler_handles[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; - update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); - ++index; - } + PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, + *update_descriptor_queue, index); } VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4..e82e5816b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -9,7 +9,6 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_pipeline.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,7 +17,7 @@ namespace Vulkan { class Device; -class ComputePipeline : public Pipeline { +class ComputePipeline { public: explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..a2ec418b1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,445 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include +#include + +#include "common/bit_field.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder.Create(device.GetLogical()); +} + +template +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; +} + +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { + static constexpr std::array unsupported_topologies{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { + union Swizzle { + u32 raw; + BitField<0, 3, Maxwell::ViewportSwizzle> x; + BitField<4, 3, Maxwell::ViewportSwizzle> y; + BitField<8, 3, Maxwell::ViewportSwizzle> z; + BitField<12, 3, Maxwell::ViewportSwizzle> w; + }; + const Swizzle unpacked{swizzle}; + return VkViewportSwizzleNV{ + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; +} + +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; + } + return PixelFormatFromRenderTargetFormat(format); +} + +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache, + const FixedPipelineState& state, + std::array stages, + const std::array& infos) + : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, + buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, + update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, state, render_pass); +} + +void GraphicsPipeline::Configure(bool is_indexed) { + static constexpr size_t max_images_elements = 64; + std::array image_view_ids; + static_vector image_view_indices; + static_vector samplers; + + texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache->UpdateRenderTargets(false); + + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache->UnbindGraphicsStorageBuffers(stage); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); + ++index; + } + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(cbufs[cbuf_index].enabled); + const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; + const u32 raw_handle{gpu_memory->Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache->UpdateGraphicsBuffers(is_indexed); + texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + + buffer_cache->BindHostGeometryBuffers(is_indexed); + + size_t index{}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + buffer_cache->BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), + *texture_cache, *update_descriptor_queue, index); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + + scheduler->BindGraphicsPipeline(*pipeline); + scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + }); +} + +void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!device.IsExtExtendedDynamicStateSupported()) { + dynamic = state.dynamic_state; + } + static_vector vertex_bindings; + static_vector vertex_binding_divisors; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = state.binding_divisors[index], + }); + } + } + static_vector vertex_attributes; + const auto& input_attributes = stage_infos[0].loads_generics; + for (size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; + if (!attribute.enabled || !input_attributes[index]) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &input_divisor_ci; + } + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; + std::array swizzles; + std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; + if (device.IsNvViewportSwizzleSupported()) { + viewport_ci.pNext = &swizzle_ci; + } + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = static_cast( + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + static_vector cb_attachments; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + static constexpr std::array mask_table{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; + const auto format{static_cast(state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto& blend{state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; + } + cb_attachments.push_back({ + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = write_mask, + }); + } + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast(cb_attachments.size()), + .pAttachments = cb_attachments.data(), + .blendConstants = {}, + }; + static_vector dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended{ + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + static_vector shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { + continue; + } + [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + */ + } + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..ba1d34a83 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + +public: + explicit GraphicsPipeline() = default; + explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, + TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, const FixedPipelineState& state, + std::array stages, + const std::array& infos); + + void Configure(bool is_indexed); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + +private: + void MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass); + + Tegra::Engines::Maxwell3D* maxwell3d{}; + Tegra::MemoryManager* gpu_memory{}; + TextureCache* texture_cache{}; + BufferCache* buffer_cache{}; + VKScheduler* scheduler{}; + VKUpdateDescriptorQueue* update_descriptor_queue{}; + + std::array spv_modules; + std::array stage_infos; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b06288403..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Pipeline { -public: - /// Add a reference count to the pipeline - void AddRef() noexcept { - ++ref_count; - } - - [[nodiscard]] bool RemoveRef() noexcept { - --ref_count; - return ref_count == 0; - } - - [[nodiscard]] u64 UsageTick() const noexcept { - return usage_tick; - } - -protected: - u64 usage_tick{}; - -private: - size_t ref_count{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903..c9da2080d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,8 +12,11 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" -#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/program_header.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -34,18 +37,18 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; - namespace { -class Environment final : public Shader::Environment { +using Shader::Backend::SPIRV::EmitSPIRV; + +class GenericEnvironment : public Shader::Environment { public: - explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} {} - ~Environment() override = default; + ~GenericEnvironment() override = default; - [[nodiscard]] std::optional Analyze(u32 start_address) { + std::optional Analyze(u32 start_address) { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -55,52 +58,47 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } - [[nodiscard]] size_t ShaderSize() const noexcept { + [[nodiscard]] size_t CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; + } + + [[nodiscard]] size_t ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } - [[nodiscard]] u128 ComputeHash() const { - const size_t size{ShaderSize()}; + [[nodiscard]] u128 CalculateHash() const { + const size_t size{ReadSize()}; auto data = std::make_unique(size); - gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash128(reinterpret_cast(data.get()), size); } - u64 ReadInstruction(u32 address) override { + u64 ReadInstruction(u32 address) final { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } - return gpu_memory.Read(program_base + address); - } - - u32 TextureBoundBuffer() override { - return kepler_compute.regs.tex_cb_index; - } - - std::array WorkgroupSize() override { - const auto& qmd{kepler_compute.launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + return gpu_memory->Read(program_base + address); } -private: +protected: static constexpr size_t INST_SIZE = sizeof(u64); - static constexpr size_t BLOCK_SIZE = 0x1000; - static constexpr size_t MAXIMUM_SIZE = 0x100000; - static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + std::optional TryFindSize(GPUVAddr guest_addr) { + constexpr size_t BLOCK_SIZE = 0x1000; + constexpr size_t MAXIMUM_SIZE = 0x100000; + + constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - std::optional TryFindSize(u32 start_address) { - GPUVAddr guest_addr = program_base + start_address; size_t offset = 0; size_t size = BLOCK_SIZE; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; - gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { const u64 inst = data[i / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { @@ -114,17 +112,87 @@ private: return std::nullopt; } - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - GPUVAddr program_base; + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; - u32 read_lowest = 0; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; - std::vector code; u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; }; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, + GPUVAddr program_base_, u32 start_offset) + : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + } + } + + ~GraphicsEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return maxwell3d->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + throw Shader::LogicError("Requesting workgroup size in a graphics stage"); + } + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + stage = Shader::Stage::Compute; + } + + ~ComputeEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return kepler_compute->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute->launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + const auto& float_control{device.FloatControlProperties()}; + profile = Shader::Profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel + }; +} PipelineCache::~PipelineCache() = default; +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (!RefreshStages()) { + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateGraphicsPipeline(); + return &pipeline; +} + ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return &pipeline; } pipeline = CreateComputePipeline(shader); - shader->compute_users.push_back(key); return &pipeline; } +bool PipelineCache::RefreshStages() { + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + graphics_key.unique_hashes[index] = u128{}; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 offset{shader_config.offset}; + shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + } + graphics_key.unique_hashes[index] = shader_info->unique_hash; + } + return true; +} + +const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr) { + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze(start_address)}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + flow_block_pool.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + std::array envs; + std::array programs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + + const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); + Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); + programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + } + std::array infos{}; + std::array modules; + + u32 binding{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + GraphicsEnvironment& env{envs[index]}; + Shader::IR::Program& program{programs[index]}; + + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + std::vector code{EmitSPIRV(profile, env, program, binding)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, + graphics_key.state, std::move(modules), infos); +} + ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - Environment env{kepler_compute, gpu_memory, program_base}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto& float_control{device.FloatControlProperties()}; - const Shader::Profile profile{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel - }; - const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; + Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + u32 binding{0}; + std::vector code{EmitSPIRV(profile, env, program, binding)}; /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); */ - shader_info->unique_hash = env.ComputeHash(); - shader_info->size_bytes = env.ShaderSize(); - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + shader_info->unique_hash = env.CalculateHash(); + shader_info->size_bytes = env.ReadSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ ShaderInfo shader; ComputePipeline pipeline{CreateComputePipeline(&shader)}; const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - shader.compute_users.push_back(key); - pipeline.AddRef(); - const size_t size_bytes{shader.size_bytes}; Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); return &compute_cache.emplace(key, std::move(pipeline)).first->second; @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) }; } -void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { - for (const ComputePipelineCacheKey& key : shader->compute_users) { - const auto it = compute_cache.find(key); - ASSERT(it != compute_cache.end()); - - Pipeline& pipeline = it->second; - if (pipeline.RemoveRef()) { - // Wait for the pipeline to be free of GPU usage before destroying it - scheduler.Wait(pipeline.UsageTick()); - compute_cache.erase(it); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27..60fb976df 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -12,11 +12,18 @@ #include #include -#include - #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,13 +33,6 @@ class System; namespace Vulkan { -class Device; -class RasterizerVulkan; -class ComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + } // namespace Vulkan namespace std { @@ -63,14 +83,28 @@ struct hash { } }; +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + } // namespace std namespace Vulkan { +class ComputePipeline; +class Device; +class RasterizerVulkan; +class RenderPassCache; +class VKDescriptorPool; +class VKScheduler; +class VKUpdateDescriptorQueue; + struct ShaderInfo { u128 unique_hash{}; size_t size_bytes{}; - std::vector compute_users; }; class PipelineCache final : public VideoCommon::ShaderCache { @@ -80,15 +114,23 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache); ~PipelineCache() override; - [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); -protected: - void OnShaderRemoval(ShaderInfo* shader) override; + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + bool RefreshStages(); + + const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr); + + GraphicsPipeline CreateGraphicsPipeline(); + ComputePipeline CreateComputePipeline(ShaderInfo* shader); ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); @@ -104,8 +146,20 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + + GraphicsPipelineCacheKey graphics_key{}; std::unordered_map compute_cache; + std::unordered_map graphics_cache; + + Shader::ObjectPool inst_pool; + Shader::ObjectPool block_pool; + Shader::ObjectPool flow_block_pool; + + Shader::Profile profile; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29..036b531b9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } + update_descriptor_queue.Acquire(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); + + BeginTransformFeedback(); + + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + UpdateDynamicStates(); + + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b915..88dbd753b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -148,6 +149,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..7e5ae43ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector descriptions; + u32 num_images{0}; + + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + if (format == PixelFormat::Invalid) { + continue; + } + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + ++num_images; + } + const size_t num_colors{descriptions.size()}; + const VkAttachmentReference* depth_attachment{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..db8e83f1a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + + class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..1bbc542a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (color == std::array{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector descriptions; std::vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..189ee5a68 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -26,35 +26,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast(key.depth_format) << 48; - value ^= static_cast(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,7 +37,7 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c9da2080d..d1399a46d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; + const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, .support_float_controls = true, @@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From 76c8a962ac4eae77e71d66a72c448930240339f9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 19:11:56 -0300 Subject: spirv: Implement VertexId and InstanceId, refactor code --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d1399a46d..90e1a30f6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -230,6 +230,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, + .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, -- cgit v1.2.3 From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: shader: Implement DMNMX, DSET, DSETP --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 90e1a30f6..75f7c1e61 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -244,6 +244,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .support_fp64_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From c63cf4fa2e22538a01c191e1f97ac0f93b67e804 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: vk_pipeline_cache: Add pipeline cache --- .../renderer_vulkan/vk_pipeline_cache.cpp | 391 ++++++++++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 34 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 1 + .../renderer_vulkan/vk_render_pass_cache.h | 4 +- 4 files changed, 332 insertions(+), 98 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 75f7c1e61..41fc9588f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -4,12 +4,15 @@ #include #include +#include #include #include #include "common/bit_cast.h" #include "common/cityhash.h" +#include "common/file_util.h" #include "common/microprofile.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/memory.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -37,18 +40,23 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -namespace { -using Shader::Backend::SPIRV::EmitSPIRV; +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; + } ~GenericEnvironment() override = default; - std::optional Analyze(u32 start_address) { + std::optional Analyze() { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -66,11 +74,15 @@ public: return read_highest - read_lowest + INST_SIZE; } + [[nodiscard]] bool CanBeSerialized() const noexcept { + return has_unbound_instructions; + } + [[nodiscard]] u128 CalculateHash() const { const size_t size{ReadSize()}; - auto data = std::make_unique(size); + const auto data{std::make_unique(size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(reinterpret_cast(data.get()), size); + return Common::CityHash128(data.get(), size); } u64 ReadInstruction(u32 address) final { @@ -80,9 +92,32 @@ public: if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } + has_unbound_instructions = true; return gpu_memory->Read(program_base + address); } + void Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(ReadSize())}; + const auto data{std::make_unique(code_size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + + const u32 texture_bound{TextureBoundBuffer()}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(data.get(), code_size); + if (stage == Shader::Stage::Compute) { + const std::array workgroup_size{WorkgroupSize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } + } + protected: static constexpr size_t INST_SIZE = sizeof(u64); @@ -122,16 +157,22 @@ protected: u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; + + bool has_unbound_instructions = false; }; +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_offset) - : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + GPUVAddr program_base_, u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -158,11 +199,11 @@ public: ~GraphicsEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -174,18 +215,20 @@ class ComputeEnvironment final : public GenericEnvironment { public: explicit ComputeEnvironment() = default; explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { stage = Shader::Stage::Compute; } ~ComputeEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } @@ -193,8 +236,174 @@ public: private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; + +void SerializePipeline(std::span key, std::span envs, + std::ofstream& file) { + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); +} + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { + try { + std::ofstream file; + file.exceptions(std::ifstream::failbit); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); + return; + } + if (file.tellp() == 0) { + // Write header... + } + const std::span key_span(reinterpret_cast(&key), sizeof(key)); + SerializePipeline(key_span, MakeSpan(envs), file); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::Delete(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); + } + } +} + +class FileEnvironment final : public Shader::Environment { +public: + void Deserialize(std::ifstream& file) { + u64 code_size{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } + } + + u64 ReadInstruction(u32 address) override { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; + } + + u32 TextureBoundBuffer() const override { + return texture_bound; + } + + std::array WorkgroupSize() const override { + return workgroup_size; + } + +private: + std::unique_ptr code; + std::array workgroup_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; } // Anonymous namespace +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; + std::string base_dir{shader_dir + "/vulkan"}; + std::string transferable_dir{base_dir + "/transferable"}; + std::string precompiled_dir{base_dir + "/precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); + + Common::ThreadWorker worker(11, "PipelineBuilder"); + std::mutex cache_mutex; + struct { + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + std::ifstream file; + Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + // Read header... + + while (file.tellg() != end) { + if (stop_loading) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + auto envs{std::make_shared>(num_envs)}; + for (FileEnvironment& env : *envs) { + env.Deserialize(file); + } + if (envs->front().ShaderStage() == Shader::Stage::Compute) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + + std::lock_guard lock{cache_mutex}; + compute_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } else { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : *envs) { + env_ptrs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + + std::lock_guard lock{cache_mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } + ++state.total; + } + { + std::lock_guard lock{cache_mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + } + worker.WaitForRequests(); +} + size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -279,17 +488,22 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { if (!cpu_shader_addr) { return nullptr; } - ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; if (!shader) { - return CreateComputePipelineWithoutShader(*cpu_shader_addr); + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + shader = MakeShaderInfo(env, *cpu_shader_addr); } - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { return &pipeline; } - pipeline = CreateComputePipeline(shader); + pipeline = CreateComputePipeline(key, shader); return &pipeline; } @@ -310,26 +524,25 @@ bool PipelineCache::RefreshStages() { } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; if (!shader_info) { - const u32 offset{shader_config.offset}; - shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); } graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; } -const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr) { - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; +const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze(start_address)}) { + if (const std::optional cached_hash{env.Analyze()}) { info->unique_hash = *cached_hash; info->size_bytes = env.CachedSize(); } else { // Slow path, not really hit on commercial games // Build a control flow graph to get the real shader size - flow_block_pool.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + main_pools.flow_block.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; info->unique_hash = env.CalculateHash(); info->size_bytes = env.ReadSize(); } @@ -339,100 +552,100 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); - - std::array envs; +GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, + const GraphicsPipelineCacheKey& key, + std::span envs) { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + size_t env_index{0}; std::array programs; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } - const auto program{static_cast(index)}; - GraphicsEnvironment& env{envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + Shader::Environment& env{*envs[env_index]}; + ++env_index; - const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); - Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); - programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } std::array infos{}; std::array modules; u32 binding{0}; + env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } UNIMPLEMENTED_IF(index == 0); - GraphicsEnvironment& env{envs[index]}; Shader::IR::Program& program{programs[index]}; - const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - std::vector code{EmitSPIRV(profile, env, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + Shader::Environment& env{*envs[env_index]}; + ++env_index; + const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, - graphics_key.state, std::move(modules), infos); + descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, + std::move(modules), infos); } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + main_pools.ReleaseContents(); + + std::array graphics_envs; + boost::container::static_vector generic_envs; + boost::container::static_vector envs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{graphics_envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + generic_envs.push_back(&env); + envs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); + } + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; - if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { - // TODO: Load from cache + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + main_pools.ReleaseContents(); + ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); } - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env) const { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; - Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(profile, env, program, binding)}; - /* - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-dis D:\\shader.spv"); - */ - shader_info->unique_hash = env.CalculateHash(); - shader_info->size_bytes = env.ReadSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { - ShaderInfo shader; - ComputePipeline pipeline{CreateComputePipeline(&shader)}; - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - const size_t size_bytes{shader.size_bytes}; - Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); - return &compute_cache.emplace(key, std::move(pipeline)).first->second; -} - -ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { - const auto& qmd{kepler_compute.launch_description}; - return { - .unique_hash = unique_hash, - .shared_memory_size = qmd.shared_alloc, - .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, - }; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 60fb976df..2ecb68bdc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,7 @@ namespace Vulkan { class ComputePipeline; class Device; +class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKDescriptorPool; @@ -107,6 +109,18 @@ struct ShaderInfo { size_t size_bytes{}; }; +struct ShaderPools { + void ReleaseContents() { + inst.ReleaseContents(); + block.ReleaseContents(); + flow_block.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, @@ -123,19 +137,24 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + private: bool RefreshStages(); - const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr); + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); GraphicsPipeline CreateGraphicsPipeline(); - ComputePipeline CreateComputePipeline(ShaderInfo* shader); + GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs); - ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, + Shader::Environment& env) const; Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -155,11 +174,10 @@ private: std::unordered_map compute_cache; std::unordered_map graphics_cache; - Shader::ObjectPool inst_pool; - Shader::ObjectPool block_pool; - Shader::ObjectPool flow_block_pool; + ShaderPools main_pools; Shader::Profile profile; + std::string pipeline_cache_filename; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 7e5ae43ea..1c6ba7289 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -50,6 +50,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + std::lock_guard lock{mutex}; const auto [pair, is_new] = cache.try_emplace(key); if (!is_new) { return *pair->second; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index db8e83f1a..eaa0ed775 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "video_core/surface.h" @@ -37,7 +38,7 @@ struct hash { namespace Vulkan { - class Device; +class Device; class RenderPassCache { public: @@ -48,6 +49,7 @@ public: private: const Device* device{}; std::unordered_map cache; + std::mutex mutex; }; } // namespace Vulkan -- cgit v1.2.3 From f8115a6a9e544c3cc33f32ea821d0df15e01591c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: vk_pipeline_cache: Add pipeline cache --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 036b531b9..8f63a7591 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -514,6 +514,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } +void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); +} + void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 88dbd753b..2f1551e65 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -101,6 +101,8 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; private: static constexpr size_t MAX_TEXTURES = 192; -- cgit v1.2.3 From d40faa1db0f1703edc4e8f279f1556cee4ebddad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 16:12:04 -0300 Subject: vk_pipeline_cache: Fix ReleaseContents order --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 2ecb68bdc..d481f56f9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -111,9 +111,9 @@ struct ShaderInfo { struct ShaderPools { void ReleaseContents() { - inst.ReleaseContents(); - block.ReleaseContents(); flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } Shader::ObjectPool inst; -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 7 ++++++- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6684d37a6..8e544d745 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 41fc9588f..bdbc8dd1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_vote = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:33:45 -0300 Subject: shader: Implement NDC [-1, 1], attribute types and default varying initialization --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 33 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++- 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a2ec418b1..a87ed1976 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -181,6 +181,9 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bdbc8dd1e..504b8c9d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -437,7 +437,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - profile = Shader::Profile{ + base_profile = Shader::Profile{ .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -458,6 +458,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .generic_input_types{}, }; } @@ -589,6 +590,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, Shader::Environment& env{*envs[env_index]}; ++env_index; + const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } @@ -645,9 +647,36 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, env, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, + Shader::Stage stage) { + Shader::Profile profile{base_profile}; + if (stage == Shader::Stage::VertexB) { + profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), + &CastAttributeType); + } + return profile; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index d481f56f9..e09d78063 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -156,6 +156,8 @@ private: ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env) const; + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -176,7 +178,7 @@ private: ShaderPools main_pools; - Shader::Profile profile; + Shader::Profile base_profile; std::string pipeline_cache_filename; }; -- cgit v1.2.3 From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: shader: Implement TXQ and fix FragDepth --- .../renderer_vulkan/vk_pipeline_cache.cpp | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 504b8c9d6..30d424346 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -25,6 +25,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -45,6 +46,10 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } +u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | static_cast(offset); +} + class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; @@ -101,15 +106,21 @@ public: const auto data{std::make_unique(code_size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + const u64 num_texture_types{static_cast(texture_types.size())}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); @@ -147,10 +158,47 @@ protected: return std::nullopt; } + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, + u32 cbuf_offset) { + const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + + const Shader::TextureType result{[&] { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + throw Shader::NotImplementedException("Texture buffer"); + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } + }()}; + texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + return result; + } + Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; std::vector code; + std::unordered_map texture_types; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -176,29 +224,45 @@ public: switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; + stage_index = 0; break; case Maxwell::ShaderProgram::VertexB: stage = Shader::Stage::VertexB; + stage_index = 0; break; case Maxwell::ShaderProgram::TesselationControl: stage = Shader::Stage::TessellationControl; + stage_index = 1; break; case Maxwell::ShaderProgram::TesselationEval: stage = Shader::Stage::TessellationEval; + stage_index = 2; break; case Maxwell::ShaderProgram::Geometry: stage = Shader::Stage::Geometry; + stage_index = 3; break; case Maxwell::ShaderProgram::Fragment: stage = Shader::Stage::Fragment; + stage_index = 4; break; default: UNREACHABLE_MSG("Invalid program={}", program); + break; } } ~GraphicsEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{maxwell3d->regs}; + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, + cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } @@ -209,6 +273,7 @@ public: private: Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; }; class ComputeEnvironment final : public GenericEnvironment { @@ -224,6 +289,15 @@ public: ~ComputeEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, + cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } @@ -278,7 +352,9 @@ class FileEnvironment final : public Shader::Environment { public: void Deserialize(std::ifstream& file) { u64 code_size{}; + u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -286,6 +362,13 @@ public: .read(reinterpret_cast(&stage), sizeof(stage)); code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u64 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { @@ -300,6 +383,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -310,6 +401,7 @@ public: private: std::unique_ptr code; + std::unordered_map texture_types; std::array workgroup_size{}; u32 texture_bound{}; u32 read_lowest{}; -- cgit v1.2.3 From ec005be99d4f231f6d4d812841c84ab4af4204a6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:55:07 -0300 Subject: shader: Fix rasterizer integration order issues --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 7 +++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | 5 +---- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a87ed1976..82536b9d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -139,7 +139,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector samplers; texture_cache->SynchronizeGraphicsDescriptors(); - texture_cache->UpdateRenderTargets(false); const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; @@ -181,13 +180,17 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + texture_cache->UpdateRenderTargets(false); + scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); + + scheduler->BindGraphicsPipeline(*pipeline); + if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - scheduler->BindGraphicsPipeline(*pipeline); scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8f63a7591..d7d9927dd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -178,7 +178,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); UpdateDynamicStates(); const auto& regs{maxwell3d.regs}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 1c6ba7289..b2dcd74ab 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -56,15 +56,12 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; - u32 num_images{0}; - for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; if (format == PixelFormat::Invalid) { continue; } descriptions.push_back(AttachmentDescription(*device, format, key.samples)); - ++num_images; } const size_t num_colors{descriptions.size()}; const VkAttachmentReference* depth_attachment{}; @@ -89,7 +86,7 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pNext = nullptr, .flags = 0, .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), + .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .subpassCount = 1, .pSubpasses = &subpass, .dependencyCount = 0, -- cgit v1.2.3 From cb6039ccea77d35fb829c337fd61451f549e3453 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:56:09 -0300 Subject: vk_pipeline_cache: Fix pipeline and shader caches --- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 +++++++++++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30d424346..51c155077 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -62,7 +62,7 @@ public: ~GenericEnvironment() override = default; std::optional Analyze() { - const std::optional size{TryFindSize(start_address)}; + const std::optional size{TryFindSize()}; if (!size) { return std::nullopt; } @@ -71,6 +71,13 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } + void SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); + } + [[nodiscard]] size_t CachedSize() const noexcept { return cached_highest - cached_lowest + INST_SIZE; } @@ -80,7 +87,7 @@ public: } [[nodiscard]] bool CanBeSerialized() const noexcept { - return has_unbound_instructions; + return !has_unbound_instructions; } [[nodiscard]] u128 CalculateHash() const { @@ -95,7 +102,7 @@ public: read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { - return code[address / INST_SIZE]; + return code[(address - cached_lowest) / INST_SIZE]; } has_unbound_instructions = true; return gpu_memory->Read(program_base + address); @@ -117,30 +124,34 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } + file.flush(); } protected: static constexpr size_t INST_SIZE = sizeof(u64); - std::optional TryFindSize(GPUVAddr guest_addr) { + std::optional TryFindSize() { constexpr size_t BLOCK_SIZE = 0x1000; constexpr size_t MAXIMUM_SIZE = 0x100000; constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - size_t offset = 0; - size_t size = BLOCK_SIZE; + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; @@ -623,6 +634,7 @@ bool PipelineCache::RefreshStages() { GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; shader_info = MakeShaderInfo(env, *cpu_shader_addr); } + shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; @@ -707,6 +719,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { GraphicsEnvironment& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + generic_envs.push_back(&env); envs.push_back(&env); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e09d78063..b55e14189 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -172,6 +172,7 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + std::array shader_infos{}; std::unordered_map compute_cache; std::unordered_map graphics_cache; -- cgit v1.2.3 From 675a82416d7775dc7a252a5d8f5b704e6b8f2326 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 03:08:31 -0300 Subject: spirv: Remove dependencies on Environment when generating SPIR-V --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 51c155077..251559b16 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -680,7 +680,6 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, std::array modules; u32 binding{0}; - env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == u128{}) { continue; @@ -691,11 +690,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - Shader::Environment& env{*envs[env_index]}; - ++env_index; - - const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; - const std::vector code{EmitSPIRV(profile, env, program, binding)}; + const Shader::Profile profile{MakeProfile(key, program.stage)}; + const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, @@ -753,7 +749,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -- cgit v1.2.3 From dbd882ddeb1a1a9233c0085d0b8ccb022db385b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: shader: Better interpolation and disabled attributes support --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 82536b9d6..278509bf0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,10 +221,10 @@ void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineSta } } static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].loads_generics; + const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < state.attributes.size(); ++index) { const auto& attribute = state.attributes[index]; - if (!attribute.enabled || !input_attributes[index]) { + if (!attribute.enabled || !input_attributes[index].used) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 251559b16..69dd945b2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -755,6 +755,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } switch (attr.Type()) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- .../renderer_vulkan/vk_pipeline_cache.cpp | 47 ++++++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 69dd945b2..0d6a32bfd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,10 +114,12 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -132,7 +134,10 @@ public: file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + const u32 shared_memory_size{SharedMemorySize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), + sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } @@ -278,6 +283,16 @@ public: return maxwell3d->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const u64 size{sph.LocalMemorySize()}; + ASSERT(size <= std::numeric_limits::max()); + return static_cast(size); + } + + u32 SharedMemorySize() const override { + throw Shader::LogicError("Requesting shared memory size in graphics stage"); + } + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -313,6 +328,16 @@ public: return kepler_compute->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.local_pos_alloc; + } + + u32 SharedMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.shared_alloc; + } + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; @@ -366,6 +391,7 @@ public: u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -381,7 +407,8 @@ public: texture_types.emplace(key, type); } if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); } @@ -402,6 +429,14 @@ public: return it->second; } + u32 LocalMemorySize() const override { + return local_memory_size; + } + + u32 SharedMemorySize() const override { + return shared_memory_size; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -414,6 +449,8 @@ private: std::unique_ptr code; std::unordered_map texture_types; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; @@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ + .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, @@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { shader = MakeShaderInfo(env, *cpu_shader_addr); } const ComputePipelineCacheKey key{ - .unique_hash = shader->unique_hash, - .shared_memory_size = qmd.shared_alloc, + .unique_hash{shader->unique_hash}, + .shared_memory_size{qmd.shared_alloc}, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; -- cgit v1.2.3 From 3c758d9b538e957a20ea6db136741ad2bd16406d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 21:55:47 -0300 Subject: vk_pipeline_cache: Fix size hashing of shaders --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0d6a32bfd..8b2816c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -68,7 +68,7 @@ public: } cached_lowest = start_address; cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + return Common::CityHash128(reinterpret_cast(code.data()), *size); } void SetCachedSize(size_t size_bytes) { @@ -126,12 +126,10 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); - file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } - file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -141,7 +139,6 @@ public: } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } - file.flush(); } protected: @@ -161,10 +158,10 @@ protected: code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { - const u64 inst = data[i / INST_SIZE]; + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + i; + return offset + index; } } guest_addr += BLOCK_SIZE; @@ -751,7 +748,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { continue; } const auto program{static_cast(index)}; - GraphicsEnvironment& env{graphics_envs[index]}; + auto& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); @@ -771,6 +768,8 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + main_pools.ReleaseContents(); ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; if (!pipeline_cache_filename.empty()) { -- cgit v1.2.3 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8b2816c13..6cde01491 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { } u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | static_cast(offset); + return (static_cast(index) << 32) | offset; } class GenericEnvironment : public Shader::Environment { @@ -114,11 +114,13 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) @@ -130,6 +132,10 @@ public: file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -212,6 +218,7 @@ protected: std::vector code; std::unordered_map texture_types; + std::unordered_map cbuf_values; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -267,6 +274,17 @@ public: ~GraphicsEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{maxwell3d->regs}; const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; @@ -312,6 +330,18 @@ public: ~ComputeEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; @@ -386,8 +416,10 @@ public: void Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_cbuf_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) @@ -403,6 +435,13 @@ public: .read(reinterpret_cast(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -418,6 +457,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; if (it == texture_types.end()) { @@ -445,6 +492,7 @@ public: private: std::unique_ptr code; std::unordered_map texture_types; + std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; u32 shared_memory_size{}; -- cgit v1.2.3 From 7a1c14269e20cffeed780f388c90a86f8bba1a92 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 03:58:46 -0300 Subject: spirv: Add fixed pipeline point size --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6cde01491..eb4df9000 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -864,6 +864,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; if (stage == Shader::Stage::VertexB) { profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); } -- cgit v1.2.3 From 0c933e20dec02e12a4644281b9b7bf9716a5cbb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 21:28:00 -0300 Subject: vk_pipeline_cache: Name SPIR-V modules --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb4df9000..30a707599 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -777,6 +777,11 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); + if (device.HasDebuggingToolAttached()) { + const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], + key.unique_hashes[index][1])}; + modules[stage_index].SetObjectNameEXT(name.c_str()); + } } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, @@ -836,8 +841,13 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(base_profile, program, binding)}; + vk::ShaderModule spv_module{BuildShader(device, code)}; + if (device.HasDebuggingToolAttached()) { + const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + spv_module.SetObjectNameEXT(name.c_str()); + } return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - BuildShader(device, code)}; + std::move(spv_module)}; } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { -- cgit v1.2.3 From 2fc698b040e7e25223ba6ebe31abb04b1fc65f06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 01:36:22 -0300 Subject: vulkan: Build pipelines in parallel at runtime Wait from the worker thread for a pipeline to build before binding it to the command buffer. This allows queueing pipelines to multiple threads. --- .../renderer_vulkan/vk_compute_pipeline.cpp | 95 +++++++++++++--------- .../renderer_vulkan/vk_compute_pipeline.h | 30 +++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 72 +++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 31 +++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 78 +++++++++--------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 24 ++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 15 +--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 10 +-- src/video_core/renderer_vulkan/vk_scheduler.h | 7 +- 9 files changed, 197 insertions(+), 165 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e544d745..1c3249e3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -27,8 +27,9 @@ DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& inf ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + Common::ThreadWorker* thread_worker, const Shader::Info& info_, + vk::ShaderModule spv_module_) + : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { DescriptorLayoutTuple tuple{CreateLayout(device, info)}; descriptor_set_layout = std::move(tuple.descriptor_set_layout); @@ -36,46 +37,55 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + auto func{[this, &device] { + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(); + } } -void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { +void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, TextureCache& texture_cache) { + update_descriptor_queue.Acquire(); + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); - ++index; + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); -} -void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, - TextureCache& texture_cache) { texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; @@ -103,15 +113,26 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t index{}; + size_t image_index{}; PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - *update_descriptor_queue, index); -} + update_descriptor_queue, image_index); -VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + if (!building_flag.test()) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + }); + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - return descriptor_set; + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index e82e5816b..02da504f7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,10 @@ #pragma once +#include + #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" @@ -16,36 +19,26 @@ namespace Vulkan { class Device; +class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - const Shader::Info& info, vk::ShaderModule spv_module); + Common::ThreadWorker* thread_worker, const Shader::Info& info, + vk::ShaderModule spv_module); - ComputePipeline& operator=(ComputePipeline&&) noexcept = default; - ComputePipeline(ComputePipeline&&) noexcept = default; + ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; + ComputePipeline(ComputePipeline&&) noexcept = delete; ComputePipeline& operator=(const ComputePipeline&) = delete; ComputePipeline(const ComputePipeline&) = delete; - void ConfigureBufferCache(BufferCache& buffer_cache); - void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); - - [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); - - [[nodiscard]] VkPipeline Handle() const noexcept { - return *pipeline; - } - - [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { - return *pipeline_layout; - } + void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: - VKUpdateDescriptorQueue* update_descriptor_queue; + VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; vk::ShaderModule spv_module; @@ -54,6 +47,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 278509bf0..ddc08b8c4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -112,13 +112,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, BufferCache& buffer_cache_, TextureCache& texture_cache_, const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state, + const FixedPipelineState& state_, std::array stages, const std::array& infos) - : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, - buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, - update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, scheduler{scheduler_}, + update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ + std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -128,8 +130,17 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; - MakePipeline(device, state, render_pass); + auto func{[this, &device, &render_pass_cache] { + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, render_pass); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (worker_thread) { + worker_thread->QueueWork(std::move(func)); + } else { + func(); + } } void GraphicsPipeline::Configure(bool is_indexed) { @@ -138,67 +149,72 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector image_view_indices; static_vector samplers; - texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeGraphicsDescriptors(); - const auto& regs{maxwell3d->regs}; + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache->UnbindGraphicsStorageBuffers(stage); + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); ++index; } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; for (const auto& desc : info.texture_descriptors) { const u32 cbuf_index{desc.cbuf_index}; const u32 cbuf_offset{desc.cbuf_offset}; ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory->Read(addr)}; + const u32 raw_handle{gpu_memory.Read(addr)}; const TextureHandle handle(raw_handle, via_header_index); image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache->UpdateGraphicsBuffers(is_indexed); - texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.UpdateGraphicsBuffers(is_indexed); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - buffer_cache->BindHostGeometryBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - buffer_cache->BindHostStageBuffers(stage); + buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - *texture_cache, *update_descriptor_queue, index); + texture_cache, update_descriptor_queue, index); } - texture_cache->UpdateRenderTargets(false); - scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); - - scheduler->BindGraphicsPipeline(*pipeline); + texture_cache.UpdateRenderTargets(false); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + if (!building_flag.test()) { + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + if (scheduler.UpdateGraphicsPipeline(this)) { + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + }); + } if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); - scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ba1d34a83..4e0583157 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,13 +5,15 @@ #pragma once #include +#include +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -25,34 +27,34 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline() = default; explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, - TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + BufferCache& buffer_cache, TextureCache& texture_cache, + const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const FixedPipelineState& state, std::array stages, const std::array& infos); void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; - GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; + GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; private: - void MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass); + void MakePipeline(const Device& device, VkRenderPass render_pass); - Tegra::Engines::Maxwell3D* maxwell3d{}; - Tegra::MemoryManager* gpu_memory{}; - TextureCache* texture_cache{}; - BufferCache* buffer_cache{}; - VKScheduler* scheduler{}; - VKUpdateDescriptorQueue* update_descriptor_queue{}; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; + TextureCache& texture_cache; + BufferCache& buffer_cache; + VKScheduler& scheduler; + VKUpdateDescriptorQueue& update_descriptor_queue; + const FixedPipelineState state; std::array spv_modules; std::array stage_infos; @@ -61,6 +63,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30a707599..e3d9debf4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -518,9 +518,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - Common::ThreadWorker worker(11, "PipelineBuilder"); - std::mutex cache_mutex; struct { + std::mutex mutex; size_t total{0}; size_t built{0}; bool has_loaded{false}; @@ -542,51 +541,53 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } u32 num_envs{}; file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - auto envs{std::make_shared>(num_envs)}; - for (FileEnvironment& env : *envs) { + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { env.Deserialize(file); } - if (envs->front().ShaderStage() == Shader::Stage::Compute) { + if (envs.front().ShaderStage() == Shader::Stage::Compute) { ComputePipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; - ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; compute_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; - for (auto& env : *envs) { + for (auto& env : envs) { env_ptrs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } ++state.total; } { - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); state.has_loaded = true; } - worker.WaitForRequests(); + workers.WaitForRequests(); } size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -619,7 +620,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -662,10 +663,10 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateGraphicsPipeline(); - return &pipeline; + return pipeline.get(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -691,10 +692,10 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateComputePipeline(key, shader); - return &pipeline; + return pipeline.get(); } bool PipelineCache::RefreshStages() { @@ -743,9 +744,9 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr c return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, - const GraphicsPipelineCacheKey& key, - std::span envs) { +std::unique_ptr PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -783,12 +784,14 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, modules[stage_index].SetObjectNameEXT(name.c_str()); } } - return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, - std::move(modules), infos); + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, + update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), + infos); } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { +std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; @@ -809,22 +812,22 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { generic_envs.push_back(&env); envs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); } return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader) { +std::unique_ptr PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(key, std::array{&env}, pipeline_cache_filename); @@ -832,9 +835,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineCacheKey& key, - Shader::Environment& env) const { +std::unique_ptr PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -846,8 +849,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; spv_module.SetObjectNameEXT(name.c_str()); } - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - std::move(spv_module)}; + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique(device, descriptor_pool, update_descriptor_queue, + thread_worker, program.info, std::move(spv_module)); } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b55e14189..609f00898 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -14,6 +14,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -145,16 +146,19 @@ private: const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - GraphicsPipeline CreateGraphicsPipeline(); + std::unique_ptr CreateGraphicsPipeline(); - GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs); + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel); - ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, - Shader::Environment& env) const; + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env, + bool build_in_parallel); Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); @@ -174,13 +178,15 @@ private: GraphicsPipelineCacheKey graphics_key{}; std::array shader_infos{}; - std::unordered_map compute_cache; - std::unordered_map graphics_cache; + std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; std::string pipeline_cache_filename; + + Common::ThreadWorker workers; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7d9927dd..f0bd4b8af 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -276,22 +276,11 @@ void RasterizerVulkan::DispatchCompute() { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - update_descriptor_queue.Acquire(); - pipeline->ConfigureBufferCache(buffer_cache); - pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); - const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; - const VkPipeline pipeline_handle{pipeline->Handle()}; - const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; - scheduler.Record( - [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, - descriptor_set, nullptr); - cmdbuf.Dispatch(dim[0], dim[1], dim[2]); - }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..25a4933e5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { +bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { if (state.graphics_pipeline == pipeline) { - return; + return false; } state.graphics_pipeline = pipeline; - Record([pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - }); + return true; } void VKScheduler::WorkerThread() { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + Common::SetCurrentThreadName("yuzu:VulkanWorker"); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..a40bb8bcd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -22,6 +22,7 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; +class GraphicsPipeline; class StateTracker; class VKQueryCache; @@ -52,8 +53,8 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); - /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(VkPipeline pipeline); + /// Update the pipeline to the current execution context. + bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -170,7 +171,7 @@ private: VkRenderPass renderpass = nullptr; VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; + GraphicsPipeline* graphics_pipeline = nullptr; }; void WorkerThread(); -- cgit v1.2.3 From 8771639d1e97cf2224657c0d2ee87d800a784ac8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 03:15:28 -0300 Subject: vulkan: Create pipeline layouts in separate threads --- src/video_core/renderer_vulkan/pipeline_helper.h | 72 ++++++++++++---------- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 19 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 26 ++++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_update_descriptor.cpp | 4 +- .../renderer_vulkan/vk_update_descriptor.h | 2 +- 7 files changed, 65 insertions(+), 63 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 0a59aa659..eebe5d569 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -35,49 +35,52 @@ struct TextureHandle { u32 sampler; }; -struct DescriptorLayoutTuple { - vk::DescriptorSetLayout descriptor_set_layout; - vk::PipelineLayout pipeline_layout; - vk::DescriptorUpdateTemplateKHR descriptor_update_template; -}; - class DescriptorLayoutBuilder { public: - DescriptorLayoutTuple Create(const vk::Device& device) { - DescriptorLayoutTuple result; - if (!bindings.empty()) { - result.descriptor_set_layout = device.CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); + DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + + vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + if (bindings.empty()) { + return nullptr; } - result.pipeline_layout = device.CreatePipelineLayout({ + return device->CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + + vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) const { + if (entries.empty()) { + return nullptr; + } + return device->CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = pipeline_layout, + .set = 0, + }); + } + + vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + return device->CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, - .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .setLayoutCount = descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }); - if (!entries.empty()) { - result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *result.descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *result.pipeline_layout, - .set = 0, - }); - } - return result; } void Add(const Shader::Info& info, VkShaderStageFlags stage) { @@ -113,6 +116,7 @@ private: offset += sizeof(DescriptorUpdateEntry); } + const vk::Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a444d55d3..760857839 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -237,7 +237,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return nullptr; } const VkDescriptorSet set = descriptor_allocator->Commit(); - update_descriptor_queue.Send(*descriptor_template, set); + update_descriptor_queue.Send(descriptor_template.address(), set); return set; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 1c3249e3c..fb19bb4b9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -17,13 +17,6 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { -DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { - DescriptorLayoutBuilder builder; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - return builder.Create(device.GetLogical()); -} -} // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, @@ -31,10 +24,12 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip vk::ShaderModule spv_module_) : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutTuple tuple{CreateLayout(device, info)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); auto func{[this, &device] { @@ -128,7 +123,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ddc08b8c4..d17b79e02 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -27,8 +27,8 @@ using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder; +DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -39,7 +39,7 @@ DescriptorLayoutTuple CreateLayout(const Device& device, std::span @@ -124,13 +124,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - auto func{[this, &device, &render_pass_cache] { + auto func{[this, &device, &render_pass_cache, builder] { + const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; + pipeline_layout = builder.CreatePipelineLayout(set_layout); + descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); building_flag.test_and_set(); @@ -206,11 +208,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, - nullptr); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); }); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e3d9debf4..597261964 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -620,7 +620,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + workers(11, "yuzu:PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..bea9b8012 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,12 +36,12 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, +void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set) { const void* const data = upload_start; const vk::Device* const logical = &device.GetLogical(); scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, data); + logical->UpdateDescriptorSet(set, *update_template, data); }); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..82bc9920c 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,7 @@ public: void Acquire(); - void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); + void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ -- cgit v1.2.3 From d0a529683a2e5a693b53c6f24f6816c06f8f7e65 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 04:09:09 -0300 Subject: vulkan: Serialize pipelines on a separate thread --- .../renderer_vulkan/vk_pipeline_cache.cpp | 130 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 64 insertions(+), 67 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 597261964..79cd204c7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -61,6 +61,33 @@ public: ~GenericEnvironment() override = default; + u32 TextureBoundBuffer() const final { + return texture_bound; + } + + u32 LocalMemorySize() const final { + return local_memory_size; + } + + u32 SharedMemorySize() const final { + return shared_memory_size; + } + + std::array WorkgroupSize() const final { + return workgroup_size; + } + + u64 ReadInstruction(u32 address) final { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); + } + std::optional Analyze() { const std::optional size{TryFindSize()}; if (!size) { @@ -97,26 +124,10 @@ public: return Common::CityHash128(data.get(), size); } - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(ReadSize())}; - const auto data{std::make_unique(code_size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); - + const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - const u32 local_memory_size{LocalMemorySize()}; - const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) @@ -124,10 +135,10 @@ public: .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) - .write(data.get(), code_size); + .write(reinterpret_cast(code.data()), code_size); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); @@ -137,8 +148,6 @@ public: .write(reinterpret_cast(&type), sizeof(type)); } if (stage == Shader::Stage::Compute) { - const std::array workgroup_size{WorkgroupSize()}; - const u32 shared_memory_size{SharedMemorySize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -220,6 +229,11 @@ protected: std::unordered_map texture_types; std::unordered_map cbuf_values; + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -270,6 +284,10 @@ public: UNREACHABLE_MSG("Invalid program={}", program); break; } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; } ~GraphicsEnvironment() override = default; @@ -294,24 +312,6 @@ public: cbuf.address, cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return maxwell3d->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const u64 size{sph.LocalMemorySize()}; - ASSERT(size <= std::numeric_limits::max()); - return static_cast(size); - } - - u32 SharedMemorySize() const override { - throw Shader::LogicError("Requesting shared memory size in graphics stage"); - } - - std::array WorkgroupSize() const override { - throw Shader::LogicError("Requesting workgroup size in a graphics stage"); - } - private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -325,7 +325,12 @@ public: u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } ~ComputeEnvironment() override = default; @@ -351,25 +356,6 @@ public: cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return kepler_compute->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.local_pos_alloc; - } - - u32 SharedMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.shared_alloc; - } - - std::array WorkgroupSize() const override { - const auto& qmd{kepler_compute->launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -621,7 +607,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder") { + workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -796,7 +782,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; - boost::container::static_vector generic_envs; boost::container::static_vector envs; const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; @@ -810,13 +795,22 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - generic_envs.push_back(&env); envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; - if (!pipeline_cache_filename.empty()) { - SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); - } + if (pipeline_cache_filename.empty()) { + return pipeline; + } + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + boost::container::static_vector + env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] != u128{}) { + env_ptrs.push_back(&envs[index]); + } + } + SerializePipeline(key, env_ptrs, pipeline_cache_filename); + }); return pipeline; } @@ -830,8 +824,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); } return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 609f00898..343ea1554 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -187,6 +187,7 @@ private: std::string pipeline_cache_filename; Common::ThreadWorker workers; + Common::ThreadWorker serialization_thread; }; } // namespace Vulkan -- cgit v1.2.3 From d819ba4489b90955286341c739083e638173b938 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 08:34:45 +0200 Subject: shader: Implement ViewportIndex --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 79cd204c7..1f308eec2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -631,6 +631,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, + .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 12f5f320985824d1ebad587ebecb0f8406143ebc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 09:21:53 +0200 Subject: shader: Mark SSBOs as written when they are --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index fb19bb4b9..6707842ab 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -75,7 +75,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d17b79e02..e8c3a5624 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -163,7 +163,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + desc.is_written); ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; -- cgit v1.2.3 From 480dc0d5e68fd1c79345e93216013a1d2e172c70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:27:25 +0200 Subject: vk_pipeline_cache: Small fixes to the pipeline cache --- .../renderer_vulkan/vk_pipeline_cache.cpp | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1f308eec2..3111165fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,11 +539,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } else { @@ -558,11 +560,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } -- cgit v1.2.3 From 6ff2e9ba097f3619c21d2e547570e1fbaae8d6ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:19:13 -0300 Subject: vk_pipeline_cache: Remove unnecesary scope in pipeline cache locking --- .../renderer_vulkan/vk_pipeline_cache.cpp | 27 ++++++++++------------ 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3111165fb..f88ab67ae 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,13 +539,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - { - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { @@ -560,13 +558,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - { - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } @@ -635,7 +631,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, - .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_index_layer_non_geometry = + device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 5ed68e83db39e1f6790a625529f10f4e1d5a8f89 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 21:41:49 -0300 Subject: shader: Remove atomic flags and use mutex + cond variable for pipelines --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 15 ++++++++++----- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 7 ++++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 7 ++++++- 4 files changed, 32 insertions(+), 11 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6707842ab..0bb5b852d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -55,8 +55,9 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip .basePipelineHandle = 0, .basePipelineIndex = 0, }); - building_flag.test_and_set(); - building_flag.notify_all(); + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -75,7 +76,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); @@ -112,9 +114,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, update_descriptor_queue, image_index); - if (!building_flag.test()) { + if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } scheduler.Record([this](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 02da504f7..104e6cc85 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include "common/common_types.h" @@ -47,7 +49,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e8c3a5624..67de3cb79 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -135,8 +135,10 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); - building_flag.test_and_set(); - building_flag.notify_all(); + + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); @@ -196,8 +198,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); - if (!building_flag.test()) { - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + if (!is_built.load(std::memory_order::relaxed)) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } if (scheduler.UpdateGraphicsPipeline(this)) { scheduler.Record([this](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4e0583157..7d14d2378 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -63,7 +65,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan -- cgit v1.2.3 From 5b3c6d59c2c92ac388530740f8008f1b9764c14d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 22:28:07 -0300 Subject: vk_compute_pass: Fix compute passes --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 39 ++++++++++------------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 ++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - 3 files changed, 19 insertions(+), 23 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 760857839..2cfe9d4bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,27 +206,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); - /* - FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, .layout = *layout, .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); - */ } VKComputePass::~VKComputePass() = default; @@ -262,8 +258,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -271,8 +266,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); @@ -319,8 +314,8 @@ std::pair QuadIndexedPass::Assemble( const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -329,9 +324,9 @@ std::pair QuadIndexedPass::Assemble( .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; const std::array push_constants = {base_vertex, index_shift}; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 67de3cb79..a0ef0e98b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -189,6 +189,8 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.BindHostGeometryBuffers(is_indexed); + update_descriptor_queue.Acquire(); + size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0bd4b8af..0292a1b94 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -172,7 +172,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { if (!pipeline) { return; } - update_descriptor_queue.Acquire(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); -- cgit v1.2.3 From 72daa2a039d58d23b0dca035bb5f6af8b10ce97b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 5 Apr 2021 08:56:58 +0200 Subject: shader: Fix ShadowCube declaration type, set number of pipeline threads based on hardware --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f88ab67ae..088de7001 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/bit_cast.h" @@ -607,7 +608,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { + workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ -- cgit v1.2.3 From bfeeb23ddce9f3531a834c257bd8af05c42ed194 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 5 Apr 2021 19:15:45 -0300 Subject: vk_pipeline_cache: Fix num of pipeline workers on weird platforms --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 088de7001..25f592b8a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -608,7 +608,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; -- cgit v1.2.3 From 1f3eb601acdcdfa4c119cffbf36b5792147b893f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 02:56:15 -0300 Subject: shader: Implement texture buffers --- src/video_core/renderer_vulkan/pipeline_helper.h | 10 ++++++++++ src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 15 +++++++++------ src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 4 files changed, 29 insertions(+), 12 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index eebe5d569..decf0d32c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -93,6 +93,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } } private: @@ -146,6 +149,8 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { case Shader::TextureType::ColorArrayCube: case Shader::TextureType::ShadowArrayCube: return VideoCommon::ImageViewType::CubeArray; + case Shader::TextureType::Buffer: + break; } UNREACHABLE_MSG("Invalid texture type {}", type); return {}; @@ -161,6 +166,11 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samp update_descriptor_queue.AddSampledImage(vk_image_view, sampler); ++index; } + for (const auto& desc : info.texture_buffer_descriptors) { + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + ++index; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0bb5b852d..9922cbd0f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -93,20 +93,23 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; const bool via_header_index{launch_desc.linked_tsc}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a0ef0e98b..afdd8b371 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,19 +169,23 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25f592b8a..23bf84a92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -212,7 +212,7 @@ protected: case Tegra::Texture::TextureType::Texture2DArray: return Shader::TextureType::ColorArray2D; case Tegra::Texture::TextureType::Texture1DBuffer: - throw Shader::NotImplementedException("Texture buffer"); + return Shader::TextureType::Buffer; case Tegra::Texture::TextureType::TextureCubeArray: return Shader::TextureType::ColorArrayCube; default: -- cgit v1.2.3 From e9a91bc5cc2c39b476ba8946f66930f5ab5608b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 20:14:55 -0300 Subject: shader: Interact texture buffers with buffer cache --- src/video_core/renderer_vulkan/pipeline_helper.h | 26 ++++----- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 57 ++++++++++++++------ src/video_core/renderer_vulkan/vk_buffer_cache.h | 18 +++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 30 +++++++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 ++++++++--- .../renderer_vulkan/vk_texture_cache.cpp | 63 ++++------------------ src/video_core/renderer_vulkan/vk_texture_cache.h | 30 +++++------ 7 files changed, 140 insertions(+), 115 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index decf0d32c..cff93cc60 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,7 +24,8 @@ struct TextureHandle { [[likely]] if (via_header_index) { image = data; sampler = data; - } else { + } + else { const Tegra::Texture::TextureHandle handle{data}; image = handle.tic_id; sampler = via_header_index ? image : handle.tsc_id.Value(); @@ -90,12 +91,12 @@ public: for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); } + for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } } private: @@ -156,20 +157,15 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { return {}; } -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, - const ImageId* image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue) { + image_view_ids += info.texture_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{samplers[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - ++index; - } - for (const auto& desc : info.texture_buffer_descriptors) { - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - ++index; } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..cdda56ab1 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -67,25 +67,50 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) - : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_) { - buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); + : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), + device{&runtime.device}, + buffer{device->GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = SizeBytes(), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + })}, + commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); +} + +VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return *it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .handle = device->GetLogical().CreateBufferView({ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = *buffer, + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, + .offset = offset, + .range = size, + }), + }); + return *views.back().handle; } BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..ea17406dc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -9,6 +9,7 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,6 +27,8 @@ public: explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_); + [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } @@ -35,8 +38,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + vk::BufferView handle; + }; + + const Device* device{}; vk::Buffer buffer; MemoryCommit commit; + std::vector views; }; class BufferCacheRuntime { @@ -87,6 +99,11 @@ public: BindBuffer(buffer, offset, size); } + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format) { + update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + } + private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { update_descriptor_queue.AddBuffer(buffer, offset, size); @@ -123,6 +140,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9922cbd0f..ac47b1f3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -80,8 +80,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, desc.is_written); ++ssbo_index; } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); texture_cache.SynchronizeComputeDescriptors(); @@ -99,6 +97,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -106,16 +108,26 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t image_index{}; - PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - update_descriptor_queue, image_index); + buffer_cache.UnbindComputeTextureBuffers(); + ImageId* texture_buffer_ids{image_view_ids.data()}; + size_t index{}; + for (const auto& desc : info.texture_buffer_descriptors) { + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), + image_view.format); + ++texture_buffer_ids; + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index afdd8b371..893258b4a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,6 +175,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -182,24 +186,37 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + size_t index{}; for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format); + ++index; + ++texture_buffer_index; } + texture_buffer_index += info.texture_descriptors.size(); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); - size_t index{}; + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - texture_cache, update_descriptor_queue, index); + PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, + update_descriptor_queue); } texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1bbc542a1..e42b091c5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,10 +15,10 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -162,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { - if (info.type != ImageType::Buffer) { - return vk::Buffer{}; - } - const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); - return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = info.size.width * bytes_per_block, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); -} - [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { switch (VideoCore::Surface::GetFormatType(format)) { case VideoCore::Surface::SurfaceType::ColorTexture: @@ -813,13 +794,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + image(MakeImage(runtime.device, info)), + commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (image) { - commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - } else { - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -828,11 +805,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } if (runtime.device.HasDebuggingToolAttached()) { - if (image) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } else { - buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, @@ -884,19 +857,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { - // TODO: Move this to another API - scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, map.offset); - const VkBuffer src_buffer = map.buffer; - const VkBuffer dst_buffer = *buffer; - scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - }); -} - void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -1032,19 +992,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI UNIMPLEMENTED(); break; case VideoCommon::ImageViewType::Buffer: - buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = image.Buffer(), - .format = format_info.format, - .offset = 0, // TODO: Redesign buffer cache to support this - .range = image.guest_size_bytes, - }); + UNREACHABLE(); break; } } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 189ee5a68..498e76a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -41,9 +41,9 @@ struct TextureCacheRuntime { void Finish(); - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, @@ -54,7 +54,7 @@ struct TextureCacheRuntime { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + bool CanAccelerateImageUpload(Image&) const noexcept { return false; } @@ -92,8 +92,6 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); - void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(const StagingBufferRef& map, std::span copies); @@ -101,10 +99,6 @@ public: return *image; } - [[nodiscard]] VkBuffer Buffer() const noexcept { - return *buffer; - } - [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { return aspect_mask; } @@ -121,7 +115,6 @@ public: private: VKScheduler* scheduler; vk::Image image; - vk::Buffer buffer; MemoryCommit commit; vk::ImageView image_view; std::vector storage_image_views; @@ -132,6 +125,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] VkImageView DepthView(); @@ -142,10 +137,6 @@ public: return *image_views[static_cast(query_type)]; } - [[nodiscard]] VkBufferView BufferView() const noexcept { - return *buffer_view; - } - [[nodiscard]] VkImage ImageHandle() const noexcept { return image_handle; } @@ -162,6 +153,14 @@ public: return samples; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); @@ -169,11 +168,12 @@ private: std::array image_views; vk::ImageView depth_view; vk::ImageView stencil_view; - vk::BufferView buffer_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +++++++++++---------- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +++-- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 -- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4a..57e2d569c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92..fcebb8f6e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab..991afe521 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5..70328680d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] std::vector TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); std::ranges::transform( -- cgit v1.2.3 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 104e6cc85..8efdc2926 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include #include "common/common_types.h" #include "common/thread_worker.h" -- cgit v1.2.3 From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: shader: Implement SULD and SUST --- src/video_core/renderer_vulkan/blit_image.cpp | 4 +- src/video_core/renderer_vulkan/pipeline_helper.h | 43 +++----- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 112 +++++++++++++++++---- src/video_core/renderer_vulkan/vk_texture_cache.h | 23 +++-- 7 files changed, 127 insertions(+), 65 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c0d5c7f4..39fe9289b 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -361,7 +361,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); @@ -435,7 +435,7 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index cff93cc60..d2c3f11c1 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,6 +97,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.image_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); + } } private: @@ -127,36 +130,6 @@ private: size_t offset{}; }; -inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - case Shader::TextureType::Buffer: - break; - } - UNREACHABLE_MSG("Invalid texture type {}", type); - return {}; -} - inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { @@ -164,9 +137,17 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); } + for (const auto& desc : info.image_descriptors) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ac47b1f3c..3d690f335 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -108,6 +108,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 57e2d569c..23c01f24e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -186,6 +186,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0292a1b94..2ba44330f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -494,7 +494,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, if (!image_view) { return false; } - screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 70328680d..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -215,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return VK_COMPONENT_SWIZZLE_ZERO; } +[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + return VK_IMAGE_VIEW_TYPE_1D; + case Shader::TextureType::Color2D: + return VK_IMAGE_VIEW_TYPE_2D; + case Shader::TextureType::ColorCube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case Shader::TextureType::Color3D: + return VK_IMAGE_VIEW_TYPE_3D; + case Shader::TextureType::ColorArray1D: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case Shader::TextureType::ColorArray2D: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case Shader::TextureType::ColorArrayCube: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case Shader::TextureType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; + } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} + [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { switch (type) { case VideoCommon::ImageViewType::e1D: @@ -232,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { case VideoCommon::ImageViewType::CubeArray: return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case VideoCommon::ImageViewType::Rect: - LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + UNIMPLEMENTED_MSG("Rect image view"); return VK_IMAGE_VIEW_TYPE_2D; case VideoCommon::ImageViewType::Buffer: UNREACHABLE_MSG("Texture buffers can't be image views"); @@ -539,6 +563,28 @@ struct RangedBarrierRange { } }; +[[nodiscard]] VkFormat Format(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return VK_FORMAT_R8_SINT; + case Shader::ImageFormat::R8_UINT: + return VK_FORMAT_R8_UINT; + case Shader::ImageFormat::R16_UINT: + return VK_FORMAT_R16_UINT; + case Shader::ImageFormat::R16_SINT: + return VK_FORMAT_R16_SINT; + case Shader::ImageFormat::R32_UINT: + return VK_FORMAT_R32_UINT; + case Shader::ImageFormat::R32G32_UINT: + return VK_FORMAT_R32G32_UINT; + case Shader::ImageFormat::R32G32B32A32_UINT: + return VK_FORMAT_R32G32B32A32_UINT; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return VK_FORMAT_R32_UINT; +} } // Anonymous namespace void TextureCacheRuntime::Finish() { @@ -577,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } } - ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(src.format == dst.format); ASSERT(!(is_dst_msaa && !is_src_msaa)); ASSERT(operation == Fermi2D::Operation::SrcCopy); @@ -915,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span swizzle{ SwizzleSource::R, @@ -954,39 +1001,39 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; - const auto create = [&](VideoCommon::ImageViewType view_type, std::optional num_layers) { + const auto create = [&](TextureType tex_type, std::optional num_layers) { VkImageViewCreateInfo ci{create_info}; - ci.viewType = ImageViewType(view_type); + ci.viewType = ImageViewType(tex_type); if (num_layers) { ci.subresourceRange.layerCount = *num_layers; } vk::ImageView handle = device->GetLogical().CreateImageView(ci); if (device->HasDebuggingToolAttached()) { - handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } - image_views[static_cast(view_type)] = std::move(handle); + image_views[static_cast(tex_type)] = std::move(handle); }; switch (info.type) { case VideoCommon::ImageViewType::e1D: case VideoCommon::ImageViewType::e1DArray: - create(VideoCommon::ImageViewType::e1D, 1); - create(VideoCommon::ImageViewType::e1DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e1DArray); + create(TextureType::Color1D, 1); + create(TextureType::ColorArray1D, std::nullopt); + render_target = Handle(TextureType::ColorArray1D); break; case VideoCommon::ImageViewType::e2D: case VideoCommon::ImageViewType::e2DArray: - create(VideoCommon::ImageViewType::e2D, 1); - create(VideoCommon::ImageViewType::e2DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e2DArray); + create(TextureType::Color2D, 1); + create(TextureType::ColorArray2D, std::nullopt); + render_target = Handle(Shader::TextureType::ColorArray2D); break; case VideoCommon::ImageViewType::e3D: - create(VideoCommon::ImageViewType::e3D, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e3D); + create(TextureType::Color3D, std::nullopt); + render_target = Handle(Shader::TextureType::Color3D); break; case VideoCommon::ImageViewType::Cube: case VideoCommon::ImageViewType::CubeArray: - create(VideoCommon::ImageViewType::Cube, 6); - create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + create(TextureType::ColorCube, 6); + create(TextureType::ColorArrayCube, std::nullopt); break; case VideoCommon::ImageViewType::Rect: UNIMPLEMENTED(); @@ -1009,7 +1056,8 @@ VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; } - depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); return *depth_view; } @@ -1017,18 +1065,38 @@ VkImageView ImageView::StencilView() { if (stencil_view) { return *stencil_view; } - stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); return *stencil_view; } -vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { +VkImageView ImageView::StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + auto& view{views[static_cast(texture_type)]}; + if (view) { + return *view; + } + view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); + return *view; +} + +vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = image_handle, .viewType = ImageViewType(type), - .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, + .format = vk_format, .components{ .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 498e76a1c..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -133,8 +134,11 @@ public: [[nodiscard]] VkImageView StencilView(); - [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { - return *image_views[static_cast(query_type)]; + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { + return *image_views[static_cast(texture_type)]; } [[nodiscard]] VkImage ImageHandle() const noexcept { @@ -145,10 +149,6 @@ public: return render_target; } - [[nodiscard]] PixelFormat ImageFormat() const noexcept { - return image_format; - } - [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { return samples; } @@ -162,15 +162,20 @@ public: } private: - [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); + struct StorageViews { + std::array signeds; + std::array unsigneds; + }; + + [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; - std::array image_views; + std::array image_views; + std::unique_ptr storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; - PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; -- cgit v1.2.3 From e5e79648cfa3ac9d30c00bccf4252cd0dc93bccc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Jul 2021 18:16:56 -0300 Subject: pipeline_helper: Add missing [[maybe_unused]] --- src/video_core/renderer_vulkan/pipeline_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index d2c3f11c1..a39459b2e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,7 +97,7 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.image_descriptors) { + for ([[maybe_unused]] const auto& desc : info.image_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); } } -- cgit v1.2.3 From 1030b612a36f6b44e3b25215039748b01cfb9b8c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:12:56 -0300 Subject: vk_rasterizer: Request outside render pass execution context for compute --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2ba44330f..7df169c85 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -279,6 +279,7 @@ void RasterizerVulkan::DispatchCompute() { const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } -- cgit v1.2.3 From ab543f18213133b3076b81f30df386d5cb470e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:37:03 -0300 Subject: spirv: Guard against typeless image reads on unsupported devices --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fcebb8f6e..25dbefd5c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -635,6 +635,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 479ca00071ccaab6ca9ac28daf375e1ed15dc447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:50:30 -0300 Subject: nsight_aftermath_tracker: Report used shaders to Nsight Aftermath --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2cfe9d4bd..ec9866605 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + device.SaveShader(code); pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25dbefd5c..f699a9bdf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -770,6 +770,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; + device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], @@ -846,7 +847,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program, binding)}; + device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f699a9bdf..b953d694b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, device.IsExtShaderViewportIndexLayerSupported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, -- cgit v1.2.3 From a33014022ed86c27ba4faa243fa6d0a69df75564 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:57:37 -0300 Subject: pipeline_helper: Simplify descriptor objects initialization --- src/video_core/renderer_vulkan/pipeline_helper.h | 58 ++++++++++-------------- 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index a39459b2e..e2167dc4b 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,42 +85,34 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); - } - for ([[maybe_unused]] const auto& desc : info.image_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); - } + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage) { - bindings.push_back({ - .binding = binding, - .descriptorType = type, - .descriptorCount = 1, - .stageFlags = stage, - .pImmutableSamplers = nullptr, - }); - entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = type, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); + void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + for (size_t i = 0; i < num; ++i) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } } const vk::Device* device{}; -- cgit v1.2.3 From f263760c5a3aff771123b32b15677e1f7a089640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:22 -0300 Subject: shader: Implement geometry shaders --- .../renderer_vulkan/vk_pipeline_cache.cpp | 56 +++++++++++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 7 ++- 2 files changed, 56 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b953d694b..f49add208 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -769,7 +769,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program.stage)}; + const Shader::Profile profile{MakeProfile(key, program)}; const std::vector code{EmitSPIRV(profile, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA } Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - Shader::Stage stage) { + const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; - if (stage == Shader::Stage::VertexB) { - profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + profile.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + profile.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + profile.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + profile.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + profile.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; } return profile; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 343ea1554..8b6839966 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -33,6 +33,10 @@ namespace Core { class System; } +namespace Shader::IR { +struct Program; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -160,7 +164,8 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; -- cgit v1.2.3 From a83579b50a167ab9483e5058fd1c748018ef6d7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 16:56:22 -0300 Subject: shader: Implement early Z tests --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f49add208..8a59a2611 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -934,6 +934,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, profile.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + profile.force_early_z = key.state.early_z != 0; return profile; } -- cgit v1.2.3 From b126987c59964d81ae3705ad7ad6c0ace8714e19 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 01:04:59 -0300 Subject: shader: Implement transform feedbacks and define file format --- .../renderer_vulkan/fixed_pipeline_state.cpp | 19 +++- .../renderer_vulkan/fixed_pipeline_state.h | 26 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 118 ++++++++++++++++++++- 3 files changed, 156 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d8f683907..6a3baf837 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -52,6 +52,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; + no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -113,10 +115,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (!has_extended_dynamic_state) { - no_extended_dynamic_state.Assign(1); + if (no_extended_dynamic_state != 0) { dynamic_state.Refresh(regs); } + if (xfb_enabled != 0) { + xfb_state.Refresh(regs); + } } void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { @@ -158,6 +162,17 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } +void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { + return Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + varyings = regs.tfb_varying_locs; +} + void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 348f1d6ce..5568c4f72 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -130,6 +130,18 @@ struct FixedPipelineState { } }; + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + + void Refresh(const Maxwell& regs); + }; + struct DynamicState { union { u32 raw1; @@ -168,6 +180,7 @@ struct FixedPipelineState { union { u32 raw1; BitField<0, 1, u32> no_extended_dynamic_state; + BitField<1, 1, u32> xfb_enabled; BitField<2, 1, u32> primitive_restart_enable; BitField<3, 1, u32> depth_bias_enable; BitField<4, 1, u32> depth_clamp_disabled; @@ -199,6 +212,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; + TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); @@ -211,8 +225,16 @@ struct FixedPipelineState { } size_t Size() const noexcept { - const size_t total_size = sizeof *this; - return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + if (xfb_enabled != 0) { + // When transform feedback is enabled, use the whole struct + return sizeof(*this); + } else if (no_extended_dynamic_state != 0) { + // Dynamic state is enabled, we can enable more + return offsetof(FixedPipelineState, xfb_state); + } else { + // No XFB, extended dynamic state enabled + return offsetof(FixedPipelineState, dynamic_state); + } } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8a59a2611..de52d0f30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -248,6 +248,10 @@ namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +// TODO: Move this to a separate file +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION{1}; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; @@ -379,13 +383,14 @@ void SerializePipeline(const Key& key, const Envs& envs, const std::string& file try { std::ofstream file; file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); if (!file.is_open()) { LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); return; } if (file.tellp() == 0) { - // Write header... + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); } const std::span key_span(reinterpret_cast(&key), sizeof(key)); SerializePipeline(key_span, MakeSpan(envs), file); @@ -520,8 +525,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading file.exceptions(std::ifstream::failbit); const auto end{file.tellg()}; file.seekg(0, std::ios::beg); - // Read header... + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::Delete(pipeline_cache_filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Render_Vulkan, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + pipeline_cache_filename); + } + return; + } while (file.tellg() != end) { if (stop_loading) { return; @@ -879,6 +903,88 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +static std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; @@ -893,6 +999,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), @@ -902,6 +1011,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; break; default: -- cgit v1.2.3 From 6c512f4bffde6bd8e4dbc74ed27cc84cd7fffadb Mon Sep 17 00:00:00 2001 From: ameerj Date: Wed, 14 Apr 2021 00:32:18 -0400 Subject: spirv: Implement alpha test --- .../renderer_vulkan/vk_pipeline_cache.cpp | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index de52d0f30..80f196d0e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -492,6 +492,37 @@ private: u32 read_lowest{}; u32 read_highest{}; }; + +Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return Shader::CompareFunction::Never; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return Shader::CompareFunction::Less; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return Shader::CompareFunction::Equal; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return Shader::CompareFunction::LessThanEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return Shader::CompareFunction::Greater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return Shader::CompareFunction::NotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return Shader::CompareFunction::GreaterThanEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return Shader::CompareFunction::Always; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); + return {}; +} } // Anonymous namespace void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -1016,6 +1047,11 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, } profile.convert_depth_mode = gl_ndc; break; + case Shader::Stage::Fragment: + profile.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; default: break; } -- cgit v1.2.3 From 416e1b7441d34512fcb0ffed014daf7ca4bb62bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:36:36 -0300 Subject: spirv: Implement image buffers --- src/video_core/renderer_vulkan/pipeline_helper.h | 2 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 25 ++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 +++++++++++++--------- 3 files changed, 37 insertions(+), 21 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index e2167dc4b..aaf9a735e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -88,6 +88,7 @@ public: Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } @@ -126,6 +127,7 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { image_view_ids += info.texture_buffer_descriptors.size(); + image_view_ids += info.image_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3d690f335..3c907ec5a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -97,10 +97,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -108,24 +110,29 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); buffer_cache.UnbindComputeTextureBuffers(); ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format); + image_view.format, is_written); ++texture_buffer_ids; ++index; - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 23c01f24e..84720a6f9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,10 +175,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -186,28 +188,33 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format); + image_view.BufferSize(), image_view.format, + is_written); ++index; ++texture_buffer_index; - } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); texture_buffer_index += info.texture_descriptors.size(); + texture_buffer_index += info.image_descriptors.size(); } buffer_cache.UpdateGraphicsBuffers(is_indexed); -- cgit v1.2.3 From 7ae3ea6beefae76e6671436114863fce1baacd9e Mon Sep 17 00:00:00 2001 From: lat9nq Date: Thu, 15 Apr 2021 19:01:45 -0400 Subject: vk_pipeline_cache: Silence GCC warnings Silences `-Werror=missing-field-initializers` due to missing initializers. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 80f196d0e..ee22255bf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -696,6 +696,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, + .alpha_test_func{}, + .xfb_varyings{}, }; } -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 13 ++++++++++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 30 ++++++++++++++++++++++ .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- 5 files changed, 48 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index dc4ff0da2..8f0b0b8ec 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -685,6 +685,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { return {}; } +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return VK_POLYGON_MODE_POINT; + case Maxwell::PolygonMode::Line: + return VK_POLYGON_MODE_LINE; + case Maxwell::PolygonMode::Fill: + return VK_POLYGON_MODE_FILL; + } + UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); + return {}; +} + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 9f78e15b6..50a599c11 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -65,6 +65,8 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 84720a6f9..d5e9dae0f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -355,7 +355,8 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, + .polygonMode = + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ee22255bf..0bccc640a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1040,6 +1040,36 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + profile.tess_clockwise = key.state.tessellation_clockwise == 0; + profile.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + profile.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; case Shader::Stage::Geometry: if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, -- cgit v1.2.3 From e3514bcd6b09f623da14c4f3c4ffd988e75577ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:31:15 -0300 Subject: spirv: Implement ViewportMask with NV_viewport_array2 --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0bccc640a..4d0d3ebb7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -690,6 +690,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), -- cgit v1.2.3 From f18a6dd1bdaffda4c3e771af3cf7cf41919ebd67 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 16 Apr 2021 23:52:58 +0200 Subject: shader: Implement SR_Y_DIRECTION --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 2 ++ src/video_core/renderer_vulkan/fixed_pipeline_state.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 3 files changed, 4 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 6a3baf837..24834e0f7 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -82,6 +82,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 5568c4f72..31de6b2c8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -202,6 +202,7 @@ struct FixedPipelineState { BitField<3, 1, u32> early_z; BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; + BitField<10, 1, u32> y_negate; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4d0d3ebb7..e9b93336b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1116,6 +1116,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, break; } profile.force_early_z = key.state.early_z != 0; + profile.y_negate = key.state.y_negate != 0; return profile; } -- cgit v1.2.3 From dd860b684c7695097107c1186e96a70e754e5990 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 19:48:45 -0300 Subject: shader: Implement D3D samplers --- .../renderer_vulkan/vk_compute_pipeline.cpp | 31 ++++++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 25 ++++++++++++----- .../renderer_vulkan/vk_pipeline_cache.cpp | 32 ++++++++-------------- 3 files changed, 51 insertions(+), 37 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3c907ec5a..45d837ca4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -88,23 +88,34 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, boost::container::static_vector image_view_indices; boost::container::static_vector samplers; - const auto& launch_desc{kepler_compute.launch_description}; - const auto& cbufs{launch_desc.const_buffer_config}; - const bool via_header_index{launch_desc.linked_tsc}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d5e9dae0f..08f00b9ce 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,20 +169,31 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(cbufs[cbuf_index].enabled); - const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto read_handle{[&](const auto& desc) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e9b93336b..4317b2ac7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -188,9 +188,7 @@ protected: } Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, - u32 cbuf_offset) { - const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + u32 raw) { const TextureHandle handle{raw, via_header_index}; const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; @@ -219,7 +217,7 @@ protected: throw Shader::NotImplementedException("Unknown texture type"); } }()}; - texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + texture_types.emplace(raw, result); return result; } @@ -227,7 +225,7 @@ protected: GPUVAddr program_base{}; std::vector code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; u32 local_memory_size{}; @@ -250,7 +248,7 @@ using Shader::Maxwell::TranslateProgram; // TODO: Move this to a separate file constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{1}; +constexpr u32 CACHE_VERSION{2}; class GraphicsEnvironment final : public GenericEnvironment { public: @@ -308,13 +306,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{maxwell3d->regs}; - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, - cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); } private: @@ -352,13 +347,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, - cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); } private: @@ -421,7 +413,7 @@ public: code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { - u64 key; + u32 key; Shader::TextureType type; file.read(reinterpret_cast(&key), sizeof(key)) .read(reinterpret_cast(&type), sizeof(type)); @@ -457,8 +449,8 @@ public: return it->second; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + Shader::TextureType ReadTextureType(u32 handle) override { + const auto it{texture_types.find(handle)}; if (it == texture_types.end()) { throw Shader::LogicError("Uncached read texture type"); } @@ -483,7 +475,7 @@ public: private: std::unique_ptr code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; -- cgit v1.2.3 From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: shader: Move microinstruction header to the value header --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8b6839966..e12e4422f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -16,7 +16,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -- cgit v1.2.3 From d10cf55353175b13bed4cf18791e080ecb7fd95b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:17:59 -0300 Subject: shader: Implement indexed textures --- src/video_core/renderer_vulkan/pipeline_helper.h | 50 ++++++++++------- .../renderer_vulkan/vk_compute_pipeline.cpp | 46 +++++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 63 +++++++++++++--------- 3 files changed, 95 insertions(+), 64 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index aaf9a735e..dd7d2cc0c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,28 +85,30 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + template + void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { + const size_t num{descriptors.size()}; for (size_t i = 0; i < num; ++i) { bindings.push_back({ .binding = binding, .descriptorType = type, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .stageFlags = stage, .pImmutableSamplers = nullptr, }); entries.push_back({ .dstBinding = binding, .dstArrayElement = 0, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .descriptorType = type, .offset = offset, .stride = sizeof(DescriptorUpdateEntry), @@ -126,21 +128,29 @@ private: inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { - image_view_ids += info.texture_buffer_descriptors.size(); - image_view_ids += info.image_buffer_descriptors.size(); + for (const auto& desc : info.texture_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + image_view_ids += desc.count; + } for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(desc.type)}; - update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + for (u32 index = 0; index < desc.count; ++index) { + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + } } for (const auto& desc : info.image_descriptors) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - if (desc.is_written) { - texture_cache.MarkModification(image_view.image_id); + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); } - const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - update_descriptor_queue.AddImage(vk_image_view); } } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 45d837ca4..6e9f66262 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -91,35 +91,41 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - desc.secondary_cbuf_offset}; + secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TextureHandle{lhs_raw | rhs_raw, via_header_index}; } } return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - samplers.push_back(sampler->Handle()); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); @@ -130,16 +136,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++texture_buffer_ids; + ++index; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); - buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format, is_written); - ++texture_buffer_ids; - ++index; }}; std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 08f00b9ce..b7688aef9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -161,23 +161,26 @@ void GraphicsPipeline::Configure(bool is_indexed) { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++index; + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); - const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - desc.secondary_cbuf_offset}; + second_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; @@ -187,17 +190,21 @@ void GraphicsPipeline::Configure(bool is_indexed) { return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); } @@ -208,24 +215,30 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++index; + ++texture_buffer_index; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written); - ++index; - ++texture_buffer_index; }}; const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsTextureBuffers(stage); std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - texture_buffer_index += info.texture_descriptors.size(); - texture_buffer_index += info.image_descriptors.size(); + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } buffer_cache.UpdateGraphicsBuffers(is_indexed); -- cgit v1.2.3 From 7a1f296cda32bdb8996f25fd1862b422ac2bfe48 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 21:05:10 -0300 Subject: shader: Fix render targets with null attachments --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 18 +++++++--- .../renderer_vulkan/vk_render_pass_cache.cpp | 42 +++++++++++----------- 2 files changed, 34 insertions(+), 26 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b7688aef9..e43db280f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -105,6 +105,17 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); return key; } + +size_t NumAttachments(const FixedPipelineState& state) { + size_t num{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const auto format{static_cast(state.color_formats[index])}; + if (format != Tegra::RenderTargetFormat::NONE) { + num = index + 1; + } + } + return num; +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -418,17 +429,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const size_t num_attachments{NumAttachments(state)}; + for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto format{static_cast(state.color_formats[index])}; - if (format == Tegra::RenderTargetFormat::NONE) { - continue; - } const auto& blend{state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 991afe521..451ffe019 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -16,18 +16,6 @@ namespace Vulkan { namespace { using VideoCore::Surface::PixelFormat; -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; @@ -54,17 +42,29 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; + std::array references{}; + u32 num_attachments{}; + u32 num_colors{}; for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; - if (format == PixelFormat::Invalid) { - continue; + const bool is_valid{format != PixelFormat::Invalid}; + references[index] = VkAttachmentReference{ + .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (is_valid) { + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + num_attachments = static_cast(index + 1); + ++num_colors; } - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); } - const size_t num_colors{descriptions.size()}; - const VkAttachmentReference* depth_attachment{}; + const bool has_depth{key.depth_format != PixelFormat::Invalid}; + VkAttachmentReference depth_reference{}; if (key.depth_format != PixelFormat::Invalid) { - depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + depth_reference = VkAttachmentReference{ + .attachment = num_colors, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); } const VkSubpassDescription subpass{ @@ -72,10 +72,10 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputAttachmentCount = 0, .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .colorAttachmentCount = num_attachments, + .pColorAttachments = references.data(), .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, + .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, .preserveAttachmentCount = 0, .pPreserveAttachments = nullptr, }; -- cgit v1.2.3 From 2dc86372c76afb134651499452bb5074b6d1e839 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Fri, 23 Apr 2021 02:38:02 -0300 Subject: shader: Fix bugs and build issues on GCC --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6e9f66262..6611c1de3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -95,7 +95,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; index < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e43db280f..a8b402253 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -226,7 +226,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { -- cgit v1.2.3 From 5b1b06f11e4520ec9d0b7864dc822daea3e3be0c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:33:21 -0300 Subject: vk_graphics_pipeline: Guard against non-tessellation pipelines using patches --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a8b402253..2bc1f67ae 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -345,12 +345,18 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { + if (!spv_modules[1] && !spv_modules[2]) { + LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); + input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + } const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .topology = input_assembly_topology, .primitiveRestartEnable = state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; -- cgit v1.2.3 From 8fda599a316b7b3a5e017cb01db1e9c021ce7654 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 21:24:30 -0300 Subject: vk_compute_pipeline: Fix index comparison oversight on compute texture buffers --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6611c1de3..990ead575 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 i = 0; index < desc.count; ++i) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; -- cgit v1.2.3 From f4ace63957ee47c4e3e913954f07375d0391beae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:27:25 -0300 Subject: shader: Accelerate pipeline transitions and use dirty flags for shaders --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 +++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.h | 54 +++++++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 31 ++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 ++---------- 4 files changed, 105 insertions(+), 56 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2bc1f67ae..100a5e07a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state_, + const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ - std::move(stages)} { + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } } +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); +} + void GraphicsPipeline::Configure(bool is_indexed) { static constexpr size_t max_images_elements = 64; std::array image_view_ids; @@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { - dynamic = state.dynamic_state; + dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; + const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast(index), @@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), - .divisor = state.binding_divisors[index], + .divisor = key.state.binding_divisors[index], }); } } static_vector vertex_attributes; const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; if (!attribute.enabled || !input_attributes[index].used) { continue; } @@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { if (!spv_modules[1] && !spv_modules[2]) { LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); @@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, @@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pScissors = nullptr, }; std::array swizzles; - std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = - MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - const size_t num_attachments{NumAttachments(state)}; + const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, @@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend{state.attachments[index]}; + const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7d14d2378..fd787840b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,10 +4,12 @@ #pragma once +#include #include #include #include #include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -20,6 +22,39 @@ namespace Vulkan { +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; class RenderPassCache; class VKScheduler; @@ -35,7 +70,8 @@ public: const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, const FixedPipelineState& state, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); @@ -47,16 +83,30 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; + void AddTransition(GraphicsPipeline* transition); + + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; + } + private: void MakePipeline(const Device& device, VkRenderPass render_pass); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - const FixedPipelineState state; + + std::vector transition_keys; + std::vector transitions; std::array spv_modules; std::array stage_infos; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4317b2ac7..2bd870060 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (!RefreshStages()) { + current_pipeline = nullptr; return nullptr; } graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return current_pipeline; + } + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; - if (!is_new) { - return pipeline.get(); + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - pipeline = CreateGraphicsPipeline(); - return pipeline.get(); + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return current_pipeline; } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { } bool PipelineCache::RefreshStages() { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_valid_shaders; + } + dirty[VideoCommon::Dirty::Shaders] = false; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { @@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() { const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_valid_shaders = false; return false; } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; @@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() { shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } + last_valid_shaders = true; return true; } @@ -832,8 +852,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), - infos); + update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e12e4422f..ad569acc4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -struct GraphicsPipelineCacheKey { - std::array unique_hashes; - FixedPipelineState state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - size_t Size() const noexcept { - return sizeof(unique_hashes) + state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - } // namespace Vulkan namespace std { @@ -89,13 +69,6 @@ struct hash { } }; -template <> -struct hash { - size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - } // namespace std namespace Vulkan { @@ -181,7 +154,10 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + std::array shader_infos{}; + bool last_valid_shaders{}; std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 5ed871398b0e89cb3f2e3eb740d431f5faaa12e4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:28:02 -0300 Subject: vk_graphics_pipeline: Generate specialized pipeline config functions and improve code --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 239 ++++++++++++++++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 22 +- 2 files changed, 230 insertions(+), 31 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 100a5e07a..674226cb7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,14 +19,24 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" +#ifdef _MSC_VER +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace Vulkan { namespace { using boost::container::small_vector; using boost::container::static_vector; +using Shader::ImageBufferDescriptor; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; +constexpr size_t MAX_IMAGE_ELEMENTS = 64; + DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { @@ -116,6 +126,80 @@ size_t NumAttachments(const FixedPipelineState& state) { } return num; } + +template +bool Passes(const std::array& modules, + const std::array& stage_infos) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (!Spec::enabled_stages[stage] && modules[stage]) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& modules, + const std::array& stage_infos) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(modules, stage_infos)) { + return FindSpec(modules, stage_infos); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +ConfigureFuncPtr ConfigureFunc(const std::array& modules, + const std::array& infos) { + return FindSpec(modules, infos); +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -144,6 +228,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; + Validate(); MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -155,6 +240,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } else { func(); } + configure_func = ConfigureFunc(spv_modules, stage_infos); } void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { @@ -162,26 +248,29 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { transitions.push_back(transition); } -void GraphicsPipeline::Configure(bool is_indexed) { - static constexpr size_t max_images_elements = 64; - std::array image_view_ids; - static_vector image_view_indices; - static_vector samplers; +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; const auto read_handle{[&](const auto& desc, u32 index) { @@ -207,33 +296,60 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index++] = handle.image; } }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_image); - std::ranges::for_each(info.image_buffer_descriptors, add_image); + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + samplers[image_index] = sampler->Handle(); + ++image_index; + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); } } - std::ranges::for_each(info.image_descriptors, add_image); + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; @@ -245,29 +361,75 @@ void GraphicsPipeline::Configure(bool is_indexed) { } }}; const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { + buffer_cache.UnbindGraphicsTextureBuffers(stage); + } + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } for (const auto& desc : info.texture_descriptors) { texture_buffer_index += desc.count; } - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); } - buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); const VkSampler* samplers_it{samplers.data()}; const ImageId* views_it{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, update_descriptor_queue); + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + ConfigureDraw(); +} + +void GraphicsPipeline::ConfigureDraw() { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -550,4 +712,23 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa }); } +void GraphicsPipeline::Validate() { + size_t num_images{}; + for (const auto& info : stage_infos) { + for (const auto& desc : info.texture_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_images <= MAX_IMAGE_ELEMENTS); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index fd787840b..edab5703f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -75,8 +75,6 @@ public: std::array stages, const std::array& infos); - void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -85,6 +83,10 @@ public: void AddTransition(GraphicsPipeline* transition); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; @@ -94,9 +96,23 @@ public: : nullptr; } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: + template + void ConfigureImpl(bool is_indexed); + + void ConfigureDraw(); + void MakePipeline(const Device& device, VkRenderPass render_pass); + void Validate(); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; @@ -105,6 +121,8 @@ private: VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; + void (*configure_func)(GraphicsPipeline*, bool); + std::vector transition_keys; std::vector transitions; -- cgit v1.2.3 From 2f3c3dfc10a318f63862c4976f0608ea50c19387 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 00:15:32 -0300 Subject: vulkan: Rework descriptor allocation algorithm Create multiple descriptor pools on demand. There are some degrees of freedom what is considered a compatible pool to avoid wasting large pools on small descriptors. --- src/video_core/renderer_vulkan/blit_image.cpp | 19 +- src/video_core/renderer_vulkan/blit_image.h | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 4 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 200 +++++++++++---------- src/video_core/renderer_vulkan/vk_compute_pass.h | 28 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 +- .../renderer_vulkan/vk_compute_pipeline.h | 2 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 172 +++++++++++++----- .../renderer_vulkan/vk_descriptor_pool.h | 62 +++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 6 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- 15 files changed, 314 insertions(+), 197 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 39fe9289b..4058f62cd 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA .bindingCount = 1, .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, }; +template +inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 0, + .texture_buffers = 0, + .image_buffers = 0, + .textures = num_textures, + .images = 0, + .score = 2, +}; constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -326,14 +336,16 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, - StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + StateTracker& state_tracker_, DescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), - one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), - two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_descriptor_allocator{ + descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, + two_textures_descriptor_allocator{ + descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(one_texture_set_layout.address()))), two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( @@ -415,7 +427,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { - ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, - StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index cdda56ab1..568993c58 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -116,7 +116,7 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool) + DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index ea17406dc..c52001b5a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -16,7 +16,7 @@ namespace Vulkan { class Device; -class VKDescriptorPool; +class DescriptorPool; class VKScheduler; class BufferCacheRuntime; @@ -61,7 +61,7 @@ public: explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool); + DescriptorPool& descriptor_pool); void Finish(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index ec9866605..e2f3d16bf 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; constexpr size_t ASTC_NUM_BINDINGS = 4; -VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - return { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = static_cast(size), - }; -} - -std::array BuildInputOutputDescriptorSetBindings() { - return {{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +template +inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast(size), +}; -std::array BuildASTCDescriptorSetBindings() { - return {{ - { - .binding = ASTC_BINDING_INPUT_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_OUTPUT_IMAGE, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +constexpr std::array INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 2, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 0, + .score = 2, +}; -VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - return { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 2, +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = 0, - .stride = sizeof(DescriptorUpdateEntry), - }; -} + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_ENC_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_SWIZZLE_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_OUTPUT_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo ASTC_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 3, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 1, + .score = 4, +}; + +constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), +}; -std::array -BuildASTCPassDescriptorUpdateTemplateEntry() { - return {{ +constexpr std::array + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, .dstArrayElement = 0, @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { .stride = sizeof(DescriptorUpdateEntry), }, }}; -} struct AstcPushConstants { std::array blocks_dims; @@ -159,14 +170,13 @@ struct AstcPushConstants { u32 block_height; u32 block_height_mask; }; - } // Anonymous namespace -VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, - std::span code) { +ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -196,8 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .pipelineLayout = *layout, .set = 0, }); - - descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); } module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -226,23 +235,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ }); } -VKComputePass::~VKComputePass() = default; +ComputePass::~ComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue) { +VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { if (!descriptor_template) { return nullptr; } - const VkDescriptorSet set = descriptor_allocator->Commit(); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(descriptor_template.address(), set); return set; } -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), + : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, + VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -277,12 +286,12 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), + : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -337,14 +346,13 @@ std::pair QuadIndexedPass::Assemble( } ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_) - : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), - BuildASTCPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(AstcPushConstants)), - ASTC_DECODER_COMP_SPV), + : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..54c1ac4cb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include @@ -27,13 +26,14 @@ class VKUpdateDescriptorQueue; class Image; struct StagingBufferRef; -class VKComputePass { +class ComputePass { public: - explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, std::span code); - ~VKComputePass(); + explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code); + ~ComputePass(); protected: VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); @@ -44,14 +44,14 @@ protected: private: vk::DescriptorSetLayout descriptor_set_layout; - std::optional descriptor_allocator; + DescriptorAllocator descriptor_allocator; vk::ShaderModule module; }; -class Uint8Pass final : public VKComputePass { +class Uint8Pass final : public ComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); @@ -66,10 +66,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class QuadIndexedPass final : public VKComputePass { +class QuadIndexedPass final : public ComputePass { public: explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); @@ -84,10 +84,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class ASTCDecoderPass final : public VKComputePass { +class ASTCDecoderPass final : public ComputePass { public: explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 990ead575..54a57c358 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,7 +18,7 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) @@ -30,7 +30,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_set_layout = builder.CreateDescriptorSetLayout(); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8efdc2926..0d4cd37be 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -25,7 +25,7 @@ class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* thread_worker, const Shader::Info& info, vk::ShaderModule spv_module); diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 3bea1ff44..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include #include "common/common_types.h" @@ -13,77 +15,149 @@ namespace Vulkan { -// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. -constexpr std::size_t SETS_GROW_RATE = 0x20; +// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines +constexpr size_t SETS_GROW_RATE = 16; +constexpr s32 SCORE_THRESHOLD = 3; +constexpr u32 SETS_PER_POOL = 64; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, - VkDescriptorSetLayout layout_) - : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{&descriptor_pool_}, layout{layout_} {} +struct DescriptorBank { + DescriptorBankInfo info; + std::vector pools; +}; -VkDescriptorSet DescriptorAllocator::Commit() { - const std::size_t index = CommitResource(); - return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { + return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && + texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && + textures >= subset.textures && images >= subset.image_buffers; } -void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); +template +static u32 Accumulate(const Descriptors& descriptors) { + u32 count = 0; + for (const auto& descriptor : descriptors) { + count += descriptor.count; + } + return count; } -VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) - : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ - AllocateNewPool()} {} - -VKDescriptorPool::~VKDescriptorPool() = default; - -vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { - static constexpr u32 num_sets = 0x20000; - static constexpr VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, - }; +static DescriptorBankInfo MakeBankInfo(std::span infos) { + DescriptorBankInfo bank; + for (const Shader::Info& info : infos) { + bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); + bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); + bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); + bank.image_buffers += Accumulate(info.image_buffer_descriptors); + bank.textures += Accumulate(info.texture_descriptors); + bank.images += Accumulate(info.image_descriptors); + } + bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + + bank.image_buffers + bank.textures + bank.images; + return bank; +} - const VkDescriptorPoolCreateInfo ci{ +static void AllocatePool(const Device& device, DescriptorBank& bank) { + std::array pool_sizes; + size_t pool_cursor{}; + const auto add = [&](VkDescriptorType type, u32 count) { + if (count > 0) { + pool_sizes[pool_cursor++] = { + .type = type, + .descriptorCount = count * SETS_PER_POOL, + }; + } + }; + const auto& info{bank.info}; + add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); + add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); + add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); + add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); + bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - .maxSets = num_sets, - .poolSizeCount = static_cast(std::size(pool_sizes)), + .maxSets = SETS_PER_POOL, + .poolSizeCount = static_cast(pool_cursor), .pPoolSizes = std::data(pool_sizes), - }; - return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); + })); } -vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, - std::size_t count) { - const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai{ +DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_) + : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, + layout{layout_} {} + +VkDescriptorSet DescriptorAllocator::Commit() { + const size_t index = CommitResource(); + return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +} + +void DescriptorAllocator::Allocate(size_t begin, size_t end) { + sets.push_back(AllocateDescriptors(end - begin)); +} + +vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { + const std::vector layouts(count, layout); + VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = nullptr, - .descriptorPool = **active_pool, + .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast(count), - .pSetLayouts = layout_copies.data(), + .pSetLayouts = layouts.data(), }; - - vk::DescriptorSets sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // Our current pool is out of memory. Allocate a new one and retry - active_pool = AllocateNewPool(); - ai.descriptorPool = **active_pool; - sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + AllocatePool(*device, *bank); + allocate_info.descriptorPool = *bank->pools.back(); + new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // After allocating a new pool, we are out of memory again. We can't handle this from here. throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } +DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} + +DescriptorPool::~DescriptorPool() = default; + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + std::span infos) { + return Allocator(layout, MakeBankInfo(infos)); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const Shader::Info& info) { + return Allocator(layout, MakeBankInfo(std::array{info})); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const DescriptorBankInfo& info) { + return DescriptorAllocator(device, master_semaphore, Bank(info), layout); +} + +DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { + std::shared_lock read_lock{banks_mutex}; + const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { + return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); + }); + if (it != bank_infos.end()) { + return *banks[std::distance(bank_infos.begin(), it)].get(); + } + read_lock.unlock(); + + std::unique_lock write_lock{banks_mutex}; + bank_infos.push_back(reqs); + + auto& bank = *banks.emplace_back(std::make_unique()); + bank.info = reqs; + AllocatePool(device, bank); + return bank; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 2501f9967..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -4,21 +4,38 @@ #pragma once +#include +#include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKDescriptorPool; class VKScheduler; +struct DescriptorBank; + +struct DescriptorBankInfo { + [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; + + u32 uniform_buffers{}; ///< Number of uniform buffer descriptors + u32 storage_buffers{}; ///< Number of storage buffer descriptors + u32 texture_buffers{}; ///< Number of texture buffer descriptors + u32 image_buffers{}; ///< Number of image buffer descriptors + u32 textures{}; ///< Number of texture descriptors + u32 images{}; ///< Number of image descriptors + s32 score{}; ///< Number of descriptors in total +}; + class DescriptorAllocator final : public ResourcePool { + friend class DescriptorPool; + public: explicit DescriptorAllocator() = default; - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override = default; DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; @@ -29,36 +46,43 @@ public: VkDescriptorSet Commit(); -protected: - void Allocate(std::size_t begin, std::size_t end) override; - private: - VKDescriptorPool* descriptor_pool{}; + explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_); + + void Allocate(size_t begin, size_t end) override; + + vk::DescriptorSets AllocateDescriptors(size_t count); + + const Device* device{}; + DescriptorBank* bank{}; VkDescriptorSetLayout layout{}; - std::vector descriptors_allocations; + std::vector sets; }; -class VKDescriptorPool final { - friend DescriptorAllocator; - +class DescriptorPool { public: - explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); - ~VKDescriptorPool(); + explicit DescriptorPool(const Device& device, VKScheduler& scheduler); + ~DescriptorPool(); - VKDescriptorPool(const VKDescriptorPool&) = delete; - VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + DescriptorPool& operator=(const DescriptorPool&) = delete; + DescriptorPool(const DescriptorPool&) = delete; -private: - vk::DescriptorPool* AllocateNewPool(); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, + std::span infos); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); - vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); +private: + DescriptorBank& Bank(const DescriptorBankInfo& reqs); const Device& device; MasterSemaphore& master_semaphore; - std::vector pools; - vk::DescriptorPool* active_pool; + std::shared_mutex banks_mutex; + std::vector bank_infos; + std::vector> banks; }; } // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 674226cb7..0526c197a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,7 +205,7 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, @@ -220,7 +220,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index edab5703f..454fc049e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -67,7 +67,7 @@ public: explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2bd870060..9d9729022 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -647,7 +647,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ad569acc4..eec17d3fd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -75,10 +75,10 @@ namespace Vulkan { class ComputePipeline; class Device; +class DescriptorPool; class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; -class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -105,7 +105,7 @@ public: Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); @@ -147,7 +147,7 @@ private: const Device& device; VKScheduler& scheduler; - VKDescriptorPool& descriptor_pool; + DescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; RenderPassCache& render_pass_cache; BufferCache& buffer_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2f1551e65..1302bed02 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -147,7 +147,7 @@ private: VKScheduler& scheduler; StagingBufferPool staging_pool; - VKDescriptorPool descriptor_pool; + DescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; -- cgit v1.2.3 From ac8835659ead30d289ff8b907a2295d87790670f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 01:04:49 -0300 Subject: vulkan: Defer descriptor set work to the Vulkan thread Move descriptor lookup and update code to a separate thread. Delaying this removes work from the main GPU thread and allows creating descriptor layouts on another thread. This reduces a bit the workload of the main thread when new pipelines are encountered. --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 45 ++++++++++------------ src/video_core/renderer_vulkan/vk_compute_pass.h | 8 ++-- .../renderer_vulkan/vk_compute_pipeline.cpp | 36 +++++++++-------- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 40 +++++++++---------- .../renderer_vulkan/vk_graphics_pipeline.h | 5 ++- .../renderer_vulkan/vk_update_descriptor.cpp | 9 ----- .../renderer_vulkan/vk_update_descriptor.h | 4 +- 8 files changed, 69 insertions(+), 79 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e2f3d16bf..7e5ba283b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -172,11 +172,12 @@ struct AstcPushConstants { }; } // Anonymous namespace -ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, +ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, const DescriptorBankInfo& bank_info, - vk::Span push_constants, std::span code) { + vk::Span push_constants, std::span code) + : device{device_} { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { - if (!descriptor_template) { - return nullptr; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(descriptor_template.address(), set); - return set; -} - Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -265,10 +257,11 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const VkBuffer buffer{staging.buffer}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -276,6 +269,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); @@ -321,10 +316,10 @@ std::pair QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ @@ -333,7 +328,9 @@ std::pair QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - const std::array push_constants = {base_vertex, index_shift}; + const std::array push_constants{base_vertex, index_shift}; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), @@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), - device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; @@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); - - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); - const VkPipelineLayout vk_layout = *layout; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); - scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, - block_dims, params, set](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, + params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .bytes_per_block_log2 = params.bytes_per_block_log2, @@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .block_height = params.block_height, .block_height_mask = params.block_height_mask, }; - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); - cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 54c1ac4cb..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -36,15 +36,14 @@ public: ~ComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); - + const Device& device; vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; vk::Pipeline pipeline; - -private: vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; + +private: vk::ShaderModule module; }; @@ -99,7 +98,6 @@ public: private: void MakeDataBuffer(); - const Device& device; VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 54a57c358..feaace0c5 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,21 +18,22 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutBuilder builder{device.GetLogical()}; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + auto func{[this, &descriptor_pool] { + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); - descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - scheduler.Record([this](vk::CommandBuffer cmdbuf) { + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - }); - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0d4cd37be..a560e382e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -40,6 +40,7 @@ public: VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: + const Device& device; VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0526c197a..76080bde1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, DescriptorPool& descriptor_pool, + const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + auto func{[this, &render_pass_cache, &descriptor_pool] { + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); - MakePipeline(device, render_pass); + MakePipeline(render_pass); std::lock_guard lock{build_mutex}; is_built = true; @@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() { build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - if (scheduler.UpdateGraphicsPipeline(this)) { - scheduler.Record([this](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - }); - } - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = key.state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 454fc049e..85e21f611 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -109,19 +109,20 @@ private: void ConfigureDraw(); - void MakePipeline(const Device& device, VkRenderPass render_pass); + void MakePipeline(VkRenderPass render_pass); void Validate(); const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; + const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - void (*configure_func)(GraphicsPipeline*, bool); + void (*configure_func)(GraphicsPipeline*, bool){}; std::vector transition_keys; std::vector transitions; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index bea9b8012..ce3427c9b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, - VkDescriptorSet set) { - const void* const data = upload_start; - const vk::Device* const logical = &device.GetLogical(); - scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, *update_template, data); - }); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82bc9920c..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,9 @@ public: void Acquire(); - void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); + const DescriptorUpdateEntry* UpdateData() const noexcept { + return upload_start; + } void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ -- cgit v1.2.3 From 025b20f96ae588777e3ff11083cc4184bf418af6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 03:53:26 -0300 Subject: shader: Move pipeline cache logic to separate files Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL. --- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 719 +++------------------ src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 4 files changed, 104 insertions(+), 649 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -23,7 +23,7 @@ namespace Vulkan { struct GraphicsPipelineCacheKey { - std::array unique_hashes; + std::array unique_hashes; FixedPipelineState state; size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,7 +11,8 @@ #include "common/bit_cast.h" #include "common/cityhash.h" -#include "common/file_util.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" #include "common/thread_worker.h" #include "core/core.h" @@ -36,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -43,449 +45,19 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -template -auto MakeSpan(Container& container) { - return std::span(container.data(), container.size()); -} - -static u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | offset; -} - -class GenericEnvironment : public Shader::Environment { -public: - explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} { - start_address = start_address_; - } - - ~GenericEnvironment() override = default; - - u32 TextureBoundBuffer() const final { - return texture_bound; - } - - u32 LocalMemorySize() const final { - return local_memory_size; - } - - u32 SharedMemorySize() const final { - return shared_memory_size; - } - - std::array WorkgroupSize() const final { - return workgroup_size; - } - - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - - std::optional Analyze() { - const std::optional size{TryFindSize()}; - if (!size) { - return std::nullopt; - } - cached_lowest = start_address; - cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), *size); - } - - void SetCachedSize(size_t size_bytes) { - cached_lowest = start_address; - cached_highest = start_address + static_cast(size_bytes); - code.resize(CachedSize()); - gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); - } - - [[nodiscard]] size_t CachedSize() const noexcept { - return cached_highest - cached_lowest + INST_SIZE; - } - - [[nodiscard]] size_t ReadSize() const noexcept { - return read_highest - read_lowest + INST_SIZE; - } - - [[nodiscard]] bool CanBeSerialized() const noexcept { - return !has_unbound_instructions; - } - - [[nodiscard]] u128 CalculateHash() const { - const size_t size{ReadSize()}; - const auto data{std::make_unique(size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(data.get(), size); - } - - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(CachedSize())}; - const u64 num_texture_types{static_cast(texture_types.size())}; - const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - - file.write(reinterpret_cast(&code_size), sizeof(code_size)) - .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) - .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) - .write(reinterpret_cast(&stage), sizeof(stage)) - .write(reinterpret_cast(code.data()), code_size); - for (const auto [key, type] : texture_types) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - for (const auto [key, type] : cbuf_values) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - if (stage == Shader::Stage::Compute) { - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .write(reinterpret_cast(&shared_memory_size), - sizeof(shared_memory_size)); - } else { - file.write(reinterpret_cast(&sph), sizeof(sph)); - } - } - -protected: - static constexpr size_t INST_SIZE = sizeof(u64); - - std::optional TryFindSize() { - constexpr size_t BLOCK_SIZE = 0x1000; - constexpr size_t MAXIMUM_SIZE = 0x100000; - - constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - - GPUVAddr guest_addr{program_base + start_address}; - size_t offset{0}; - size_t size{BLOCK_SIZE}; - while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); - u64* const data = code.data() + offset / INST_SIZE; - gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { - const u64 inst = data[index / INST_SIZE]; - if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + index; - } - } - guest_addr += BLOCK_SIZE; - size += BLOCK_SIZE; - offset += BLOCK_SIZE; - } - return std::nullopt; - } - - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; - Tegra::Texture::TICEntry entry; - gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - - const Shader::TextureType result{[&] { - switch (entry.texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return Shader::TextureType::Color1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return Shader::TextureType::Color2D; - case Tegra::Texture::TextureType::Texture3D: - return Shader::TextureType::Color3D; - case Tegra::Texture::TextureType::TextureCubemap: - return Shader::TextureType::ColorCube; - case Tegra::Texture::TextureType::Texture1DArray: - return Shader::TextureType::ColorArray1D; - case Tegra::Texture::TextureType::Texture2DArray: - return Shader::TextureType::ColorArray2D; - case Tegra::Texture::TextureType::Texture1DBuffer: - return Shader::TextureType::Buffer; - case Tegra::Texture::TextureType::TextureCubeArray: - return Shader::TextureType::ColorArrayCube; - default: - throw Shader::NotImplementedException("Unknown texture type"); - } - }()}; - texture_types.emplace(raw, result); - return result; - } - - Tegra::MemoryManager* gpu_memory{}; - GPUVAddr program_base{}; - - std::vector code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - - u32 local_memory_size{}; - u32 texture_bound{}; - u32 shared_memory_size{}; - std::array workgroup_size{}; - - u32 read_lowest = std::numeric_limits::max(); - u32 read_highest = 0; - - u32 cached_lowest = std::numeric_limits::max(); - u32 cached_highest = 0; - - bool has_unbound_instructions = false; -}; - namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; -// TODO: Move this to a separate file -constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{2}; - -class GraphicsEnvironment final : public GenericEnvironment { -public: - explicit GraphicsEnvironment() = default; - explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); - switch (program) { - case Maxwell::ShaderProgram::VertexA: - stage = Shader::Stage::VertexA; - stage_index = 0; - break; - case Maxwell::ShaderProgram::VertexB: - stage = Shader::Stage::VertexB; - stage_index = 0; - break; - case Maxwell::ShaderProgram::TesselationControl: - stage = Shader::Stage::TessellationControl; - stage_index = 1; - break; - case Maxwell::ShaderProgram::TesselationEval: - stage = Shader::Stage::TessellationEval; - stage_index = 2; - break; - case Maxwell::ShaderProgram::Geometry: - stage = Shader::Stage::Geometry; - stage_index = 3; - break; - case Maxwell::ShaderProgram::Fragment: - stage = Shader::Stage::Fragment; - stage_index = 4; - break; - default: - UNREACHABLE_MSG("Invalid program={}", program); - break; - } - const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); - texture_bound = maxwell3d->regs.tex_cb_index; - } - - ~GraphicsEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.address + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{maxwell3d->regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); - } - -private: - Tegra::Engines::Maxwell3D* maxwell3d{}; - size_t stage_index{}; -}; - -class ComputeEnvironment final : public GenericEnvironment { -public: - explicit ComputeEnvironment() = default; - explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ - &kepler_compute_} { - const auto& qmd{kepler_compute->launch_description}; - stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; - texture_bound = kepler_compute->regs.tex_cb_index; - shared_memory_size = qmd.shared_alloc; - workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - - ~ComputeEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.Address() + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{kepler_compute->regs}; - const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); - } - -private: - Tegra::Engines::KeplerCompute* kepler_compute{}; -}; - -void SerializePipeline(std::span key, std::span envs, - std::ofstream& file) { - if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { - return; - } - const u32 num_envs{static_cast(envs.size())}; - file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); - for (const GenericEnvironment* const env : envs) { - env->Serialize(file); - } - file.write(key.data(), key.size_bytes()); -} - -template -void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { - try { - std::ofstream file; - file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); - if (!file.is_open()) { - LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); - return; - } - if (file.tellp() == 0) { - file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) - .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); - } - const std::span key_span(reinterpret_cast(&key), sizeof(key)); - SerializePipeline(key_span, MakeSpan(envs), file); - - } catch (const std::ios_base::failure& e) { - LOG_ERROR(Common_Filesystem, "{}", e.what()); - if (!Common::FS::Delete(filename)) { - LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); - } - } +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); } -class FileEnvironment final : public Shader::Environment { -public: - void Deserialize(std::ifstream& file) { - u64 code_size{}; - u64 num_texture_types{}; - u64 num_cbuf_values{}; - file.read(reinterpret_cast(&code_size), sizeof(code_size)) - .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .read(reinterpret_cast(&start_address), sizeof(start_address)) - .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .read(reinterpret_cast(&read_highest), sizeof(read_highest)) - .read(reinterpret_cast(&stage), sizeof(stage)); - code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast(code.get()), code_size); - for (size_t i = 0; i < num_texture_types; ++i) { - u32 key; - Shader::TextureType type; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&type), sizeof(type)); - texture_types.emplace(key, type); - } - for (size_t i = 0; i < num_cbuf_values; ++i) { - u64 key; - u32 value; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&value), sizeof(value)); - cbuf_values.emplace(key, value); - } - if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); - } else { - file.read(reinterpret_cast(&sph), sizeof(sph)); - } - } - - u64 ReadInstruction(u32 address) override { - if (address < read_lowest || address > read_highest) { - throw Shader::LogicError("Out of bounds address {}", address); - } - return code[(address - read_lowest) / sizeof(u64)]; - } - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; - if (it == cbuf_values.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto it{texture_types.find(handle)}; - if (it == texture_types.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - u32 LocalMemorySize() const override { - return local_memory_size; - } - - u32 SharedMemorySize() const override { - return shared_memory_size; - } - - u32 TextureBoundBuffer() const override { - return texture_bound; - } - - std::array WorkgroupSize() const override { - return workgroup_size; - } - -private: - std::unique_ptr code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - std::array workgroup_size{}; - u32 local_memory_size{}; - u32 shared_memory_size{}; - u32 texture_bound{}; - u32 read_lowest{}; - u32 read_highest{}; -}; - Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso } } // Anonymous namespace -void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - if (title_id == 0) { - return; - } - std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; - std::string base_dir{shader_dir + "/vulkan"}; - std::string transferable_dir{base_dir + "/transferable"}; - std::string precompiled_dir{base_dir + "/precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); - return; - } - pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - - struct { - std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; - } state; - - std::ifstream file; - Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - return; - } - file.exceptions(std::ifstream::failbit); - const auto end{file.tellg()}; - file.seekg(0, std::ios::beg); - - std::array magic_number; - u32 cache_version; - file.read(magic_number.data(), magic_number.size()) - .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { - file.close(); - if (Common::FS::Delete(pipeline_cache_filename)) { - if (magic_number != MAGIC_NUMBER) { - LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); - } - if (cache_version != CACHE_VERSION) { - LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); - } - } else { - LOG_ERROR(Render_Vulkan, - "Invalid pipeline cache file and failed to delete it in \"{}\"", - pipeline_cache_filename); - } - return; - } - while (file.tellg() != end) { - if (stop_loading) { - return; - } - u32 num_envs{}; - file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - std::vector envs(num_envs); - for (FileEnvironment& env : envs) { - env.Deserialize(file); - } - if (envs.front().ShaderStage() == Shader::Stage::Compute) { - ComputePipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } else { - GraphicsPipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - boost::container::static_vector env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } - ++state.total; - } - { - std::lock_guard lock{state.mutex}; - callback(VideoCore::LoadCallbackStage::Build, 0, state.total); - state.has_loaded = true; - } - workers.WaitForRequests(); -} - size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (!RefreshStages()) { + if (!RefreshStages(graphics_key.unique_hashes)) { current_pipeline = nullptr; return nullptr; } @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - const GPUVAddr shader_addr{program_base + qmd.program_start}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - return nullptr; - } - const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* const shader{ComputeShader()}; if (!shader) { - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; - shader = MakeShaderInfo(env, *cpu_shader_addr); + return nullptr; } + const auto& qmd{kepler_compute.launch_description}; const ComputePipelineCacheKey key{ - .unique_hash{shader->unique_hash}, - .shared_memory_size{qmd.shared_alloc}, + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return pipeline.get(); } -bool PipelineCache::RefreshStages() { - auto& dirty{maxwell3d.dirty.flags}; - if (!dirty[VideoCommon::Dirty::Shaders]) { - return last_valid_shaders; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - dirty[VideoCommon::Dirty::Shaders] = false; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - graphics_key.unique_hashes[index] = u128{}; - continue; - } - const auto& shader_config{maxwell3d.regs.shader_config[index]}; - const auto program{static_cast(index)}; - const GPUVAddr shader_addr{base_addr + shader_config.offset}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); - last_valid_shaders = false; - return false; - } - const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; - if (!shader_info) { - const u32 start_address{shader_config.offset}; - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; - shader_info = MakeShaderInfo(env, *cpu_shader_addr); - } - shader_infos[index] = shader_info; - graphics_key.unique_hashes[index] = shader_info->unique_hash; + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "vulkan"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; } - last_valid_shaders = true; - return true; -} + pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); -const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { - auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze()}) { - info->unique_hash = *cached_hash; - info->size_bytes = env.CachedSize(); - } else { - // Slow path, not really hit on commercial games - // Build a control flow graph to get the real shader size - main_pools.flow_block.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; - info->unique_hash = env.CalculateHash(); - info->size_bytes = env.ReadSize(); - } - const size_t size_bytes{info->size_bytes}; - const ShaderInfo* const result{info.get()}; - Register(std::move(info), cpu_addr, size_bytes); - return result; + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); } std::unique_ptr PipelineCache::CreateGraphicsPipeline( @@ -815,7 +288,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( size_t env_index{0}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } Shader::Environment& env{*envs[env_index]}; @@ -830,7 +303,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( u32 binding{0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } UNIMPLEMENTED_IF(index == 0); @@ -844,8 +317,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], - key.unique_hashes[index][1])}; + const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } @@ -863,7 +335,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (graphics_key.unique_hashes[index] == 0) { continue; } const auto program{static_cast(index)}; @@ -871,7 +343,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; @@ -882,11 +353,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] != u128{}) { + if (key.unique_hashes[index] != 0) { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -902,8 +373,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + VideoCommon::SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); }); } return pipeline; @@ -921,7 +392,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + const auto name{fmt::format("{:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - u128 unique_hash; + u64 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -76,16 +77,12 @@ namespace Vulkan { class ComputePipeline; class Device; class DescriptorPool; -class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -struct ShaderInfo { - u128 unique_hash{}; - size_t size_bytes{}; -}; +using VideoCommon::ShaderInfo; struct ShaderPools { void ReleaseContents() { @@ -99,17 +96,16 @@ struct ShaderPools { Shader::ObjectPool flow_block; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); - ~PipelineCache() override; + ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -119,10 +115,6 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: - bool RefreshStages(); - - const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,11 +132,6 @@ private: Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -156,16 +143,13 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - std::array shader_infos{}; - bool last_valid_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; - std::string pipeline_cache_filename; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, -- cgit v1.2.3 From c5425b38c1a4d7eae270780d8b3ba66231015038 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 05:18:26 -0300 Subject: vk_compute_pass: Fix -Wshadow warning --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7e5ba283b..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -238,10 +238,10 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, - StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, + DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, -- cgit v1.2.3 From 53acdda772a8b7650c46ba9d998119b8c8e30844 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 22:11:31 -0300 Subject: vk_scheduler: Allow command submission on worker thread This changes how Scheduler::Flush works. It queues the current command buffer to be sent to the GPU but does not do it immediately. The Vulkan worker thread takes care of that. Users will have to use Scheduler::Flush + Scheduler::WaitWorker to get the previous behavior. Scheduler::Finish is unchanged. To avoid waiting on work never queued, Scheduler::Wait sends the current command buffer if that's what the caller wants to wait. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 75 +++++++------ src/video_core/renderer_vulkan/vk_blit_screen.cpp | 94 ++++++++-------- src/video_core/renderer_vulkan/vk_query_cache.cpp | 9 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +- src/video_core/renderer_vulkan/vk_scheduler.cpp | 120 +++++++++++---------- src/video_core/renderer_vulkan/vk_scheduler.h | 15 +++ src/video_core/renderer_vulkan/vk_swapchain.cpp | 39 +++---- src/video_core/renderer_vulkan/vk_swapchain.h | 23 ++-- 8 files changed, 200 insertions(+), 182 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..7e39b65bd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -130,35 +125,47 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); - if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { - const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - const bool is_srgb = use_accelerated && screen_info.is_srgb; - if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); - } - - scheduler.WaitWorker(); - - while (!swapchain.AcquireNextImage()) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { + return; + } + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; + + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + bool has_been_recreated = false; + const auto recreate_swapchain = [&] { + if (!has_been_recreated) { + has_been_recreated = true; + scheduler.WaitWorker(); } - const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); - - scheduler.Flush(render_semaphore); - - if (swapchain.Present(render_semaphore)) { - blit_screen.Recreate(); + swapchain.Create(layout.width, layout.height, is_srgb); + }; + if (swapchain.NeedsRecreate() || + swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + recreate_swapchain(); + } + bool needs_recreate; + do { + needs_recreate = false; + swapchain.AcquireNextImage(); + if (swapchain.NeedsRecreate()) { + recreate_swapchain(); + needs_recreate = true; } - gpu.RendererFrameEndNotify(); - rasterizer.TickFrame(); + } while (needs_recreate); + if (has_been_recreated) { + blit_screen.Recreate(); } + const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); + scheduler.Flush(render_semaphore); + scheduler.WaitWorker(); + swapchain.Present(render_semaphore); - render_window.OnFrameDisplayed(); + gpu.RendererFrameEndNotify(); + rasterizer.TickFrame(); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .depth = 1, }, }; - scheduler.Record( - [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); - }); + scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[image_index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + }); } - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], - descriptor_set = descriptor_sets[image_index], buffer = *buffer, - size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, + .renderPass = *renderpass, + .framebuffer = *framebuffers[image_index], .renderArea = { .offset = {0, 0}, @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .extent = size, }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[image_index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); }); @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { void VKBlitScreen::CreateSemaphores() { semaphores.resize(image_count); - std::generate(semaphores.begin(), semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { } void VKBlitScreen::ReleaseRawImages() { - for (std::size_t i = 0; i < raw_images.size(); ++i) { - scheduler.Wait(resource_ticks.at(i)); + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..1dd78328c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,10 +114,13 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.GetScheduler().CurrentTick()) { - cache.GetScheduler().Flush(); + auto& scheduler{cache.GetScheduler()}; + if (tick >= scheduler.CurrentTick()) { + scheduler.Flush(); + // This may not be necessary, but it's better to play it safe and assume drivers don't + // support wait before signal on vkGetQueryPoolResults + scheduler.WaitWorker(); } - u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa6daeb3a..0f15ad2f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -452,10 +452,11 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { - if (draw_counter > 0) { - draw_counter = 0; - scheduler.Flush(); + if (draw_counter == 0) { + return; } + draw_counter = 0; + scheduler.Flush(); } void RasterizerVulkan::TickFrame() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 25a4933e5..81cb330d9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { command->~Command(); command = next; } - + submit = false; command_offset = 0; first = nullptr; last = nullptr; @@ -42,7 +42,7 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { AcquireNewChunk(); - AllocateNewContext(); + AllocateWorkerCommandBuffer(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); } @@ -60,6 +60,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { void VKScheduler::Finish(VkSemaphore semaphore) { const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); + WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -140,75 +141,82 @@ void VKScheduler::WorkerThread() { if (quit) { continue; } - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf); - chunk_reserve.Push(std::move(extracted_chunk)); + while (!chunk_queue.Empty()) { + auto extracted_chunk = std::move(chunk_queue.Front()); + chunk_queue.Pop(); + const bool has_submit = extracted_chunk->HasSubmit(); + extracted_chunk->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); + } + chunk_reserve.Push(std::move(extracted_chunk)); + } } while (!quit); } +void VKScheduler::AllocateWorkerCommandBuffer() { + current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); +} + void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - WaitWorker(); - - std::unique_lock lock{mutex}; - - current_cmdbuf.End(); - - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - const u32 num_signal_semaphores = semaphore ? 2U : 1U; const u64 signal_value = master_semaphore->CurrentTick(); - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - master_semaphore->NextTick(); - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + cmdbuf.End(); - const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, - .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, - .commandBufferCount = 1, - .pCommandBuffers = current_cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { - case VK_SUCCESS: - break; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - vk::Check(result); - } -} + const u32 num_signal_semaphores = semaphore ? 2U : 1U; -void VKScheduler::AllocateNewContext() { - std::unique_lock lock{mutex}; + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); - current_cmdbuf.Begin({ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - .pInheritanceInfo = nullptr, + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; + + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + } }); + chunk->MarkSubmit(); + DispatchWork(); +} +void VKScheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index a40bb8bcd..40215c4c5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -86,6 +86,10 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } master_semaphore->Wait(tick); } @@ -155,15 +159,24 @@ private: return true; } + void MarkSubmit() { + submit = true; + } + bool Empty() const { return command_offset == 0; } + bool HasSubmit() const { + return submit; + } + private: Command* first = nullptr; Command* last = nullptr; size_t command_offset = 0; + bool submit = false; alignas(std::max_align_t) std::array data{}; }; @@ -176,6 +189,8 @@ private: void WorkerThread(); + void AllocateWorkerCommandBuffer(); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..a71b0b01e 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,6 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { + needs_recreate = false; + const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { @@ -82,21 +84,20 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { resource_ticks.resize(image_count); } -bool VKSwapchain::AcquireNextImage() { +void VKSwapchain::AcquireNextImage() { const VkResult result = device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], {}, &image_index); + needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; scheduler.Wait(resource_ticks[image_index]); - return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; + resource_ticks[image_index] = scheduler.CurrentTick(); } -bool VKSwapchain::Present(VkSemaphore render_semaphore) { +void VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; - bool recreated = false; - const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, @@ -107,7 +108,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; - switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: break; @@ -115,24 +115,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - if (current_width > 0 && current_height > 0) { - Create(current_width, current_height, current_srgb); - recreated = true; - } + needs_recreate = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); break; } - - resource_ticks[image_index] = scheduler.CurrentTick(); - frame_index = (frame_index + 1) % static_cast(image_count); - return recreated; -} - -bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { - // TODO(Rodrigo): Handle framebuffer pixel format changes - return framebuffer.width != current_width || framebuffer.height != current_height; + ++frame_index; + if (frame_index >= image_count) { + frame_index = 0; + } } void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, @@ -148,7 +140,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, @@ -169,7 +160,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, .clipped = VK_FALSE, .oldSwapchain = nullptr, }; - const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array queue_indices{graphics_family, present_family}; @@ -178,7 +168,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } - // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -186,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; - current_width = extent.width; - current_height = extent.height; current_srgb = srgb; images = swapchain.GetImages(); @@ -197,8 +184,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, void VKSwapchain::CreateSemaphores() { present_semaphores.resize(image_count); - std::generate(present_semaphores.begin(), present_semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(present_semaphores, + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..b38fd9dc2 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -28,14 +28,20 @@ public: void Create(u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - bool AcquireNextImage(); + void AcquireNextImage(); - /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be - /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore); + /// Presents the rendered image to the swapchain. + void Present(VkSemaphore render_semaphore); /// Returns true when the framebuffer layout has changed. - bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; + bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { + return extent.width != width || extent.height != height || current_srgb != is_srgb; + } + + /// Returns true when the image has to be recreated. + bool NeedsRecreate() const { + return needs_recreate; + } VkExtent2D GetSize() const { return extent; @@ -61,10 +67,6 @@ public: return image_format; } - bool GetSrgbState() const { - return current_srgb; - } - private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); @@ -92,9 +94,8 @@ private: VkFormat image_format{}; VkExtent2D extent{}; - u32 current_width{}; - u32 current_height{}; bool current_srgb{}; + bool needs_recreate{}; }; } // namespace Vulkan -- cgit v1.2.3 From f4b82b8dd70a57b5a828bcdbecf9aefd1bd240b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 1 May 2021 00:29:31 -0300 Subject: vk_graphics_pipeline: Fix texture buffer descriptors --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 76080bde1..9f5d30fe8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -253,6 +253,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; + size_t sampler_index{}; size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -312,11 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index] = handle.image; + image_view_indices[image_index++] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers[image_index] = sampler->Handle(); - ++image_index; + samplers[sampler_index++] = sampler->Handle(); } } if constexpr (Spec::has_images) { @@ -360,10 +360,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++texture_buffer_index; } }}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + const Shader::Info& info{stage_infos[stage]}; - if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { - buffer_cache.UnbindGraphicsTextureBuffers(stage); - } if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { add_buffer(desc); @@ -443,7 +442,9 @@ void GraphicsPipeline::ConfigureDraw() { const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (bind_pipeline) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + } if (!descriptor_set_layout) { return; } -- cgit v1.2.3 From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: shader: Implement VertexA stage --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0822862fe..638475251 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -287,22 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; + bool uses_vertex_a{}; + std::size_t start_value_processing{}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } + uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + continue; + } + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); + start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 638475251..634bbb450 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -288,32 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; - bool uses_vertex_a{}; - std::size_t start_value_processing{}; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } - uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { + // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - continue; + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; - programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); - start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From 48a17298d76cd8ed3bf2b53aca1e1ac097693976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:58:11 -0300 Subject: spirv: Support OpenGL uniform buffers and change bindings --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 634bbb450..1334882b5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -127,6 +127,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw base_profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, + .support_descriptor_aliasing = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == @@ -149,9 +150,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device.IsExtShaderViewportIndexLayerSupported(), .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), + .support_demote_to_helper_invocation = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .has_broken_unsigned_image_offsets = false, .generic_input_types{}, .fixed_state_point_size{}, .alpha_test_func{}, @@ -312,7 +315,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -398,7 +401,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; const std::vector code{EmitSPIRV(base_profile, program, binding)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; -- cgit v1.2.3 From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/video_core/renderer_vulkan/pipeline_helper.h | 17 ---------------- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 ++++++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +++++++++++---------- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +++++----------------- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 ----------- 6 files changed, 31 insertions(+), 66 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index dd7d2cc0c..c6e5e059b 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -19,23 +19,6 @@ namespace Vulkan { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - [[likely]] if (via_header_index) { - image = data; - sampler = data; - } - else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class DescriptorLayoutBuilder { public: DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index c52001b5a..c27402ff0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -140,8 +140,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { - main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ -- cgit v1.2.3 From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: Move SPIR-V emission functions to their own header --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { -- cgit v1.2.3 From a51503660435f1279ce0fa449f9cf76e74b45d74 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:08 -0300 Subject: vk_master_semaphore: Use fetch_add to increase master semaphore tick --- src/video_core/renderer_vulkan/vk_master_semaphore.h | 6 +++--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -39,9 +39,9 @@ public: return KnownGpuTick() >= tick; } - /// Advance to the logical tick. - void NextTick() noexcept { - ++current_tick; + /// Advance to the logical tick and return the old one + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order::relaxed); } /// Refresh the known GPU tick diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 81cb330d9..fcb6a5911 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -168,9 +168,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - const u64 signal_value = master_semaphore->CurrentTick(); - master_semaphore->NextTick(); - + const u64 signal_value = master_semaphore->NextTick(); Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); -- cgit v1.2.3 From 56c47951c5d92d5e6145060469528301c67e0754 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:37 -0300 Subject: vk_query_cache: Wait before reading queries --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 1dd78328c..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,17 +114,10 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - auto& scheduler{cache.GetScheduler()}; - if (tick >= scheduler.CurrentTick()) { - scheduler.Flush(); - // This may not be necessary, but it's better to play it safe and assume drivers don't - // support wait before signal on vkGetQueryPoolResults - scheduler.WaitWorker(); - } + cache.GetScheduler().Wait(tick); u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( - query.first, query.second, 1, sizeof(data), &data, sizeof(data), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); switch (query_result) { case VK_SUCCESS: -- cgit v1.2.3 From 36f158626726f940d9dba22a2b03ebbb5aa41c5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 06:26:12 -0300 Subject: vk_scheduler: Use locks instead of SPSC a queue This tries to fix a data race where we'd wait forever for the GPU. --- src/video_core/renderer_vulkan/vk_scheduler.cpp | 58 ++++++++++++++----------- src/video_core/renderer_vulkan/vk_scheduler.h | 16 ++++--- 2 files changed, 42 insertions(+), 32 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fcb6a5911..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -47,8 +47,11 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) } VKScheduler::~VKScheduler() { - quit = true; - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + quit = true; + } + work_cv.notify_all(); worker_thread.join(); } @@ -69,20 +72,19 @@ void VKScheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - bool finished = false; - do { - cv.notify_all(); - std::unique_lock lock{mutex}; - finished = chunk_queue.Empty(); - } while (!finished); + std::unique_lock lock{work_mutex}; + wait_cv.wait(lock, [this] { return work_queue.empty(); }); } void VKScheduler::DispatchWork() { if (chunk->Empty()) { return; } - chunk_queue.Push(std::move(chunk)); - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + work_queue.push(std::move(chunk)); + } + work_cv.notify_one(); AcquireNewChunk(); } @@ -135,22 +137,27 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { void VKScheduler::WorkerThread() { Common::SetCurrentThreadName("yuzu:VulkanWorker"); - std::unique_lock lock{mutex}; do { - cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); - if (quit) { - continue; + if (work_queue.empty()) { + wait_cv.notify_all(); } - while (!chunk_queue.Empty()) { - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - const bool has_submit = extracted_chunk->HasSubmit(); - extracted_chunk->ExecuteAll(current_cmdbuf); - if (has_submit) { - AllocateWorkerCommandBuffer(); + std::unique_ptr work; + { + std::unique_lock lock{work_mutex}; + work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); + if (quit) { + continue; } - chunk_reserve.Push(std::move(extracted_chunk)); + work = std::move(work_queue.front()); + work_queue.pop(); + } + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); } + std::lock_guard reserve_lock{reserve_mutex}; + chunk_reserve.push_back(std::move(work)); } while (!quit); } @@ -269,12 +276,13 @@ void VKScheduler::EndRenderPass() { } void VKScheduler::AcquireNewChunk() { - if (chunk_reserve.Empty()) { + std::lock_guard lock{reserve_mutex}; + if (chunk_reserve.empty()) { chunk = std::make_unique(); return; } - chunk = std::move(chunk_reserve.Front()); - chunk_reserve.Pop(); + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 40215c4c5..6600fb142 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,14 +6,14 @@ #include #include +#include #include #include -#include #include #include + #include "common/alignment.h" #include "common/common_types.h" -#include "common/threadsafe_queue.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -220,11 +220,13 @@ private: std::array renderpass_images{}; std::array renderpass_image_ranges{}; - Common::SPSCQueue> chunk_queue; - Common::SPSCQueue> chunk_reserve; - std::mutex mutex; - std::condition_variable cv; - bool quit = false; + std::queue> work_queue; + std::vector> chunk_reserve; + std::mutex reserve_mutex; + std::mutex work_mutex; + std::condition_variable work_cv; + std::condition_variable wait_cv; + std::atomic_bool quit{}; }; } // namespace Vulkan -- cgit v1.2.3 From 01e18581b9bdba021e6a4ae8fa0f0987ceea3e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 03:09:55 -0300 Subject: vk_pipeline_cache: Enable int8 and int16 types on Vulkan --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5edcd072..7830c0194 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -128,6 +128,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, + .support_int8 = true, + .support_int16 = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From 568d813eeadfc7888bad72d4f5d695c9d745cfe5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:43:19 -0300 Subject: vk_update_descriptor: Properly initialize payload on the update descriptor queue --- src/video_core/renderer_vulkan/vk_update_descriptor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index ce3427c9b..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -15,7 +15,9 @@ namespace Vulkan { VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) - : device{device_}, scheduler{scheduler_} {} + : device{device_}, scheduler{scheduler_} { + payload_cursor = payload.data(); +} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; -- cgit v1.2.3 From 9e7b6622c25aa858b96bf0f1c7f94223a2f449a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:12:32 -0300 Subject: shader: Split profile and runtime information in separate structs --- .../renderer_vulkan/vk_pipeline_cache.cpp | 418 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 5 +- 2 files changed, 208 insertions(+), 215 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7830c0194..88db10b75 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -89,6 +89,208 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } + +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program) { + Shader::RuntimeInfo info; + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != 0}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + } + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.state.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + break; + case Shader::Stage::Fragment: + info.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + info.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + info.force_early_z = key.state.early_z != 0; + info.y_negate = key.state.y_negate != 0; + return info; +} } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -124,7 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - base_profile = Shader::Profile{ + profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, @@ -153,14 +355,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, - .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, - .generic_input_types{}, - .fixed_state_point_size{}, - .alpha_test_func{}, - .xfb_varyings{}, }; } @@ -329,8 +527,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program)}; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -391,7 +589,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(base_profile, program)}; + const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { @@ -403,206 +601,4 @@ std::unique_ptr PipelineCache::CreateComputePipeline( thread_worker, program.info, std::move(spv_module)); } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { - if (attr.enabled == 0) { - return Shader::AttributeType::Disabled; - } - switch (attr.Type()) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return Shader::AttributeType::Float; - case Maxwell::VertexAttribute::Type::SignedInt: - return Shader::AttributeType::SignedInt; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return Shader::AttributeType::UnsignedInt; - } - return Shader::AttributeType::Float; -} - -static std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - -Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { - Shader::Profile profile{base_profile}; - - const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; - const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; - const float point_size{Common::BitCast(key.state.point_size)}; - switch (stage) { - case Shader::Stage::VertexB: - if (!has_geometry) { - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - } - std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), - &CastAttributeType); - break; - case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - profile.tess_clockwise = key.state.tessellation_clockwise == 0; - profile.tess_primitive = [&key] { - const u32 raw{key.state.tessellation_primitive.Value()}; - switch (static_cast(raw)) { - case Maxwell::TessellationPrimitive::Isolines: - return Shader::TessPrimitive::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return Shader::TessPrimitive::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return Shader::TessPrimitive::Quads; - } - UNREACHABLE(); - return Shader::TessPrimitive::Triangles; - }(); - profile.tess_spacing = [&] { - const u32 raw{key.state.tessellation_spacing}; - switch (static_cast(raw)) { - case Maxwell::TessellationSpacing::Equal: - return Shader::TessSpacing::Equal; - case Maxwell::TessellationSpacing::FractionalOdd: - return Shader::TessSpacing::FractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return Shader::TessSpacing::FractionalEven; - } - UNREACHABLE(); - return Shader::TessSpacing::Equal; - }(); - break; - case Shader::Stage::Geometry: - if (program.output_topology == Shader::OutputTopology::PointList) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - break; - case Shader::Stage::Fragment: - profile.alpha_test_func = MaxwellToCompareFunction( - key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); - profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); - break; - default: - break; - } - switch (key.state.topology) { - case Maxwell::PrimitiveTopology::Points: - profile.input_topology = Shader::InputTopology::Points; - break; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - profile.input_topology = Shader::InputTopology::Lines; - break; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - case Maxwell::PrimitiveTopology::Quads: - case Maxwell::PrimitiveTopology::QuadStrip: - case Maxwell::PrimitiveTopology::Polygon: - case Maxwell::PrimitiveTopology::Patches: - profile.input_topology = Shader::InputTopology::Triangles; - break; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - profile.input_topology = Shader::InputTopology::LinesAdjacency; - break; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - profile.input_topology = Shader::InputTopology::TrianglesAdjacency; - break; - } - profile.force_early_z = key.state.early_z != 0; - profile.y_negate = key.state.y_negate != 0; - return profile; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4e48b4956..4116cc73f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -129,9 +129,6 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program); - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -148,7 +145,7 @@ private: ShaderPools main_pools; - Shader::Profile base_profile; + Shader::Profile profile; std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; -- cgit v1.2.3 From 69b910e9e7c2b9c361f4389cb1d136105b991bc0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 17:19:35 -0300 Subject: video_core: Abstract transform feedback translation utility --- .../renderer_vulkan/fixed_pipeline_state.cpp | 25 +++---- .../renderer_vulkan/fixed_pipeline_state.h | 15 +--- .../renderer_vulkan/vk_pipeline_cache.cpp | 86 +--------------------- 3 files changed, 15 insertions(+), 111 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 24834e0f7..3a43c329f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -15,9 +15,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { - namespace { - constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; @@ -39,6 +37,16 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POLYGON, // Patches }; +void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, @@ -121,7 +129,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, dynamic_state.Refresh(regs); } if (xfb_enabled != 0) { - xfb_state.Refresh(regs); + RefreshXfbState(xfb_state, regs); } } @@ -164,17 +172,6 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } -void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { - std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { - return Layout{ - .stream = layout.stream, - .varying_count = layout.varying_count, - .stride = layout.stride, - }; - }); - varyings = regs.tfb_varying_locs; -} - void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31de6b2c8..0f1eff9cd 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -12,6 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/transform_feedback.h" namespace Vulkan { @@ -130,18 +131,6 @@ struct FixedPipelineState { } }; - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - - void Refresh(const Maxwell& regs); - }; - struct DynamicState { union { u32 raw1; @@ -213,7 +202,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 88db10b75..f86bf9c30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,88 +109,6 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } -std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -206,7 +124,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } @@ -248,7 +166,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; break; -- cgit v1.2.3 From a49532c8eb29807814214ab326ff970f5a964a03 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:52 -0300 Subject: video_core,shader: Clang-format fixes --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7e39b65bd..d50647ba7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6600fb142..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,11 +6,11 @@ #include #include -#include #include #include #include #include +#include #include "common/alignment.h" #include "common/common_types.h" -- cgit v1.2.3 From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: shader: Handle host exceptions --- .../renderer_vulkan/vk_pipeline_cache.cpp | 35 +++++++++++++++------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f86bf9c30..b6998e37c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -303,6 +303,9 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } if (current_pipeline) { current_pipeline->AddTransition(pipeline.get()); } @@ -362,9 +365,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, env, false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -405,7 +409,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs, bool build_in_parallel) { + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -458,6 +462,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { @@ -466,7 +474,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (pipeline_cache_filename.empty()) { + if (!pipeline || pipeline_cache_filename.empty()) { return pipeline; } serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { @@ -477,7 +485,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -491,18 +499,19 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!pipeline_cache_filename.empty()) { - serialization_thread.QueueWork([this, key, env = std::move(env)] { - VideoCommon::SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); - }); + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); return pipeline; } std::unique_ptr PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, - bool build_in_parallel) { + bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -517,6 +526,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, thread_worker, program.info, std::move(spv_module)); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } } // namespace Vulkan -- cgit v1.2.3 From a7e9756671be5bb99566277709e5becdea774f34 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 02:57:42 -0300 Subject: buffer_cache: Mark uniform buffers as dirty if any enable bit changes --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e5f54a84f..dfe6e6a80 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,6 +218,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { + return info ? info->constant_buffer_mask : 0; + }); auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -259,11 +262,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e362d13c5..4068a0edc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -128,7 +128,10 @@ private: std::vector transitions; std::array spv_modules; + std::array stage_infos; + std::array enabled_uniform_buffers{}; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; vk::PipelineLayout pipeline_layout; -- cgit v1.2.3 From 916ca7432474e891864524dcbc6c879d5cdbfb72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 03:40:19 -0300 Subject: opengl: Declare fragment outputs even if they are not used Fixes Ori and the Blind Forest's menu on GLASM. For some reason (probably high level optimizations) it is not sanitized on SPIR-V for OpenGL. Vulkan is unaffected by this change. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b6998e37c..cec51cc77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,9 +274,16 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .ignore_nan_fp_comparisons = false, }; } -- cgit v1.2.3 From c44b16124fcfb64b9482d639ae55670005eb6307 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:42:42 -0300 Subject: vk_buffer_cache: Add transform feedback usage to buffers --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 37 +++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 568993c58..2da3de6de 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -60,6 +60,27 @@ std::array MakeQuadIndices(u32 quad, u32 first) { } return indices; } + +vk::Buffer CreateBuffer(const Device& device, u64 size) { + VkBufferUsageFlags flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (device.IsExtTransformFeedbackSupported()) { + flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + } + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -68,21 +89,7 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, - buffer{device->GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - })}, + device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); -- cgit v1.2.3 From 77372443c3d6b20d7f78366bb4aa162f22bd7cde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:43:47 -0300 Subject: vulkan: Enable depth bounds and use it conditionally Intel devices pre-Xe don't support this. --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dfe6e6a80..d381109d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -598,13 +598,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthCompareOp = dynamic.depth_test_enable ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), .stencilTestEnable = dynamic.stencil_enable, .front = GetStencilFaceState(dynamic.front), .back = GetStencilFaceState(dynamic.back), .minDepthBounds = 0.0f, .maxDepthBounds = 0.0f, }; + if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + } static_vector cb_attachments; const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ef14e91e7..9611b480a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -682,6 +682,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } + bool enabled = regs.depth_bounds_enable; + if (enabled && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + enabled = false; + } scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); -- cgit v1.2.3 From 1148a4eac715869077ace56a9a311a167643aca3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:44:28 -0300 Subject: vulkan: Conditionally use shaderInt16 Add support for Polaris AMD devices. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cec51cc77..2a2f166c8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -249,7 +249,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .unified_descriptor_binding = true, .support_descriptor_aliasing = true, .support_int8 = true, - .support_int16 = true, + .support_int16 = device.IsShaderInt16Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From d26271b0148e4c41d87199b3e42a5702d1a6be53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 19:59:29 -0300 Subject: vk_swapchain: Avoid recreating the swapchain on each frame Recreate only when requested (or sRGB is changed) instead of tracking the frontend's size. That size is still used as a hint. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 18 ++++++------------ src/video_core/renderer_vulkan/vk_swapchain.h | 6 +++--- 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d50647ba7..54c41bcaf 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -139,17 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); bool has_been_recreated = false; const auto recreate_swapchain = [&] { if (!has_been_recreated) { has_been_recreated = true; scheduler.WaitWorker(); } + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || - swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } bool needs_recreate; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index b38fd9dc2..df6da3d93 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -33,9 +33,9 @@ public: /// Presents the rendered image to the swapchain. void Present(VkSemaphore render_semaphore); - /// Returns true when the framebuffer layout has changed. - bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { - return extent.width != width || extent.height != height || current_srgb != is_srgb; + /// Returns true when the color space has changed. + bool HasColorSpaceChanged(bool is_srgb) const { + return current_srgb != is_srgb; } /// Returns true when the image has to be recreated. -- cgit v1.2.3 From 46bd362d0dfab27e8c8d49f11eb4e3f373bf8f23 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 20:37:45 -0300 Subject: fixed_pipeline_state: Use regular for loop instead of ranges for perf MSVC generates better code for it. --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 3a43c329f..1486d088a 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,9 +84,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); - std::ranges::transform(regs.rt, color_formats.begin(), - [](const auto& rt) { return static_cast(rt.format); }); + for (size_t i = 0; i < regs.rt.size(); ++i) { + color_formats[i] = static_cast(regs.rt[i].format); + } alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); -- cgit v1.2.3 From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 2 ++ src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++++++----- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 3 ++- 4 files changed, 18 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 168ffa7e9..ca59042ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -27,6 +28,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + auto func{[this, &descriptor_pool] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -75,7 +79,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, BufferCache& buffer_cache, TextureCache& texture_cache) { update_descriptor_queue.Acquire(); - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a560e382e..a6043866d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -44,6 +44,8 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + vk::ShaderModule spv_module; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d381109d6..627ca0158 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,10 +218,14 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { - return info ? info->constant_buffer_mask : 0; - }); - + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + const Shader::Info* const info{infos[stage]}; + if (!info) { + continue; + } + enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; + std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + } auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); @@ -262,7 +266,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4068a0edc..8c81c28a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -130,7 +130,8 @@ private: std::array spv_modules; std::array stage_infos; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; -- cgit v1.2.3 From 8c954fcaee77c70583c5edc73c7c35eefcca39b0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:30 -0300 Subject: vk_pipeline_cache: Set support_derivative_control to true --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a2f166c8..a7f3619a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,6 +274,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .support_derivative_control = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), -- cgit v1.2.3 From b02c78b276f449318bdc787a35d123df01c0bc6d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:01 -0300 Subject: vk_buffer_cache: Handle null texture buffers Fixes a crash on Age of Calamity cutscenes. --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2da3de6de..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -97,6 +97,10 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast } VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + if (!device) { + // Null buffer, return a null descriptor + return VK_NULL_HANDLE; + } const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { return offset == view.offset && size == view.size && format == view.format; })}; -- cgit v1.2.3 From f45f7b5c2a869123340591cec6db58c33a5fd3ab Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 3 Jun 2021 17:42:24 -0300 Subject: vk_swapchain: Handle outdated swapchains Fixes pixelated presentation on Intel devices. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 11 +++++---- src/video_core/renderer_vulkan/vk_swapchain.cpp | 26 ++++++++++++++++------ src/video_core/renderer_vulkan/vk_swapchain.h | 14 ++++++++---- 3 files changed, 34 insertions(+), 17 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 54c41bcaf..6fda06a7e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -143,18 +143,17 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { + if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } - bool needs_recreate; + bool is_outdated; do { - needs_recreate = false; swapchain.AcquireNextImage(); - if (swapchain.NeedsRecreate()) { + is_outdated = swapchain.IsOutDated(); + if (is_outdated) { recreate_swapchain(); - needs_recreate = true; } - } while (needs_recreate); + } while (is_outdated); if (has_been_recreated) { blit_screen.Recreate(); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a71b0b01e..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,7 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { - needs_recreate = false; + is_outdated = false; + is_suboptimal = false; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -85,11 +86,22 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { } void VKSwapchain::AcquireNextImage() { - const VkResult result = - device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), - *present_semaphores[frame_index], {}, &image_index); - needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; - + const VkResult result = device.GetLogical().AcquireNextImageKHR( + *swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], + VK_NULL_HANDLE, &image_index); + switch (result) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + is_suboptimal = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); + break; + } scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); } @@ -115,7 +127,7 @@ void VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - needs_recreate = true; + is_outdated = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index df6da3d93..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -38,9 +38,14 @@ public: return current_srgb != is_srgb; } - /// Returns true when the image has to be recreated. - bool NeedsRecreate() const { - return needs_recreate; + /// Returns true when the swapchain is outdated. + bool IsOutDated() const { + return is_outdated; + } + + /// Returns true when the swapchain is suboptimal. + bool IsSubOptimal() const { + return is_suboptimal; } VkExtent2D GetSize() const { @@ -95,7 +100,8 @@ private: VkExtent2D extent{}; bool current_srgb{}; - bool needs_recreate{}; + bool is_outdated{}; + bool is_suboptimal{}; }; } // namespace Vulkan -- cgit v1.2.3 From 2a0aeaa3d283f1c7f003c956ab3079f70246b008 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 21:48:38 -0300 Subject: vk_rasterizer: Flush work on clear and dispatches --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9611b480a..e72f8426b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -189,6 +189,7 @@ void RasterizerVulkan::Clear() { if (!maxwell3d.ShouldExecute()) { return; } + FlushWork(); query_cache.UpdateCounters(); @@ -259,6 +260,8 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { + FlushWork(); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { return; -- cgit v1.2.3 From 48aad8dc05f027c21aa0e8a68d827006d9f7a196 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 22:10:02 -0300 Subject: vk_pipeline_cache: Add asynchronous shaders --- .../renderer_vulkan/vk_graphics_pipeline.h | 6 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8c81c28a8..3f8895927 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -87,7 +87,7 @@ public: configure_func(this, is_indexed); } - GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; } @@ -96,6 +96,10 @@ public: : nullptr; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + template static auto MakeConfigureSpecFunc() { return [](GraphicsPipeline* pipeline, bool is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a7f3619a5..741ed1a98 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -240,6 +240,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; @@ -303,7 +304,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; if (next) { current_pipeline = next; - return current_pipeline; + return BuiltPipeline(current_pipeline); } } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; @@ -318,7 +319,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline->AddTransition(pipeline.get()); } current_pipeline = pipeline.get(); - return current_pipeline; + return BuiltPipeline(current_pipeline); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -415,6 +416,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; + } + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; +} + std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span envs, bool build_in_parallel) try { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4116cc73f..869c63baf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -115,6 +115,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,6 +142,8 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; + bool use_asynchronous_shaders{}; + std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From cffd4716c5ebf9b93505b5bfa96d9b407f349336 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:11:36 -0300 Subject: vk_pipeline_cache,shader_notify: Add shader notifications --- .../renderer_vulkan/vk_compute_pipeline.cpp | 12 +++++- .../renderer_vulkan/vk_compute_pipeline.h | 7 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 30 +++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 22 +++++----- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++++++++--------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 9 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 7 files changed, 83 insertions(+), 49 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ca59042ff..cc855a62e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -24,14 +25,18 @@ using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* thread_worker, const Shader::Info& info_, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - auto func{[this, &descriptor_pool] { + auto func{[this, &descriptor_pool, shader_notify] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -66,6 +71,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a6043866d..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -18,6 +18,10 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { class Device; @@ -27,7 +31,8 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* thread_worker, const Shader::Info& info, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, vk::ShaderModule spv_module); ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 627ca0158..5c916c869 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -17,6 +17,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #if defined(_MSC_VER) && defined(NDEBUG) @@ -203,30 +204,30 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m } } // Anonymous namespace -GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, - BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device_, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key_, - std::array stages, - const std::array& infos) +GraphicsPipeline::GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, + VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, + RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, + std::array stages, + const std::array& infos) : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { const Shader::Info* const info{infos[stage]}; if (!info) { continue; } + stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); } - auto func{[this, &render_pass_cache, &descriptor_pool] { + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); @@ -242,6 +243,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3f8895927..40d1edabd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -20,6 +20,10 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { struct GraphicsPipelineCacheKey { @@ -64,16 +68,14 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key, - std::array stages, - const std::array& infos); + explicit GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + VideoCore::ShaderNotify* shader_notify, const Device& device, + DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, + const std::array& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 741ed1a98..e61d76490 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -235,11 +235,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, - TextureCache& texture_cache_) + TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { @@ -307,19 +307,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { return BuiltPipeline(current_pipeline); } } - const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& pipeline{pair->second}; - if (is_new) { - pipeline = CreateGraphicsPipeline(); - } - if (!pipeline) { - return nullptr; - } - if (current_pipeline) { - current_pipeline->AddTransition(pipeline.get()); - } - current_pipeline = pipeline.get(); - return BuiltPipeline(current_pipeline); + return CurrentGraphicsPipelineSlowPath(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -416,6 +404,22 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); + } + if (!pipeline) { + return nullptr; + } + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); +} + GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { if (pipeline->IsBuilt()) { return pipeline; @@ -484,14 +488,16 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; + const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique( - maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; + return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, + texture_cache, notify, device, descriptor_pool, + update_descriptor_queue, thread_worker, + render_pass_cache, key, std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -550,12 +556,14 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}", key.unique_hash)}; + const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, program.info, std::move(spv_module)); + thread_worker, notify, program.info, + std::move(spv_module)); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 869c63baf..167a2ee2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -38,6 +38,10 @@ namespace Shader::IR { struct Program; } +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -104,7 +108,7 @@ public: VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, - TextureCache& texture_cache); + TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -115,6 +119,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; std::unique_ptr CreateGraphicsPipeline(); @@ -138,6 +144,7 @@ private: RenderPassCache& render_pass_cache; BufferCache& buffer_cache; TextureCache& texture_cache; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e72f8426b..d284b3653 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -140,7 +140,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, - texture_cache), + texture_cache, gpu.ShaderNotify()), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { -- cgit v1.2.3 From d554778311c32e0a19ecdc13d7525b264d8443b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:52:04 -0300 Subject: vulkan: Use VK_EXT_provoking_vertex when available --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 4 ++-- src/video_core/renderer_vulkan/fixed_pipeline_state.h | 1 + src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 10 +++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1486d088a..f121fbf0e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,6 +84,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); @@ -91,8 +93,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 0f1eff9cd..60adae316 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -192,6 +192,7 @@ struct FixedPipelineState { BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; BitField<10, 1, u32> y_negate; + BitField<11, 1, u32> provoking_vertex_last; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5c916c869..06a80c2ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -567,9 +567,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { viewport_ci.pNext = &swizzle_ci; } + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; const VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -586,6 +593,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, -- cgit v1.2.3 From 3025b2f605df74a129f0f47aadd4247055ecd6bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:53:38 -0300 Subject: vk_rasterizer: Implement first index --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d284b3653..e339e9739 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -54,6 +54,7 @@ struct DrawParams { u32 num_instances; u32 base_vertex; u32 num_vertices; + u32 first_index; bool is_indexed; }; @@ -103,6 +104,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan .num_instances = is_instanced ? num_instances : 1, .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, + .first_index = is_indexed ? regs.index_array.first : 0, .is_indexed = is_indexed, }; if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { @@ -173,8 +175,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, + draw_params.first_index, draw_params.base_vertex, + draw_params.base_instance); } else { cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, draw_params.base_vertex, draw_params.base_instance); -- cgit v1.2.3 From cb78a1b494be2f6bc0927ed5b7a878236a3dc1c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 01:46:30 -0300 Subject: shader: Reorder shader cache directories --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e61d76490..6df4088a7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -337,22 +337,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "vulkan"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); return; } - pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + pipeline_cache_filename = base_dir / "vulkan.bin"; struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { -- cgit v1.2.3 From ea038d66538975319858f792052af1d0fa997fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 05:07:52 -0300 Subject: vulkan: Add VK_EXT_vertex_input_dynamic_state support Reduces the number of total pipelines generated on Vulkan. Tested on Super Smash Bros. Ultimate. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 67 ++++++++----- .../renderer_vulkan/fixed_pipeline_state.h | 73 ++++++++------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 107 +++++++++++++-------- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 +++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 56 +++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_state_tracker.cpp | 50 ++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 8 +- 8 files changed, 276 insertions(+), 116 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f121fbf0e..16cef8711 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -50,7 +50,7 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state) { + bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; const std::array enabled_lut{ regs.polygon_offset_point_enable, @@ -60,7 +60,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; - no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); + dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); @@ -73,11 +74,11 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); msaa_mode.Assign(regs.multisample_mode); raw2 = 0; + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); @@ -93,24 +94,44 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { - maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; - } - } - if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { - maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); + if (maxwell3d.dirty.flags[Dirty::VertexInput]) { + if (has_dynamic_vertex_input) { + // Dirty flag will be reset by the command buffer update + static constexpr std::array LUT{ + 0u, // Invalid + 1u, // SignedNorm + 1u, // UnsignedNorm + 2u, // SignedInt + 3u, // UnsignedInt + 1u, // UnsignedScaled + 1u, // SignedScaled + 1u, // Float + }; + const auto& attrs = regs.vertex_attrib_format; + attribute_types = 0; + for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + const u32 mask = attrs[i].constant != 0 ? 0 : 3; + const u32 type = LUT[static_cast(attrs[i].type.Value())]; + attribute_types |= static_cast(type & mask) << (i * 2); + } + } else { + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); + } } } if (maxwell3d.dirty.flags[Dirty::Blending]) { @@ -126,10 +147,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (no_extended_dynamic_state != 0) { + if (!extended_dynamic_state) { dynamic_state.Refresh(regs); } - if (xfb_enabled != 0) { + if (xfb_enabled) { RefreshXfbState(xfb_state, regs); } } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 60adae316..04f34eb97 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -168,44 +168,51 @@ struct FixedPipelineState { union { u32 raw1; - BitField<0, 1, u32> no_extended_dynamic_state; - BitField<1, 1, u32> xfb_enabled; - BitField<2, 1, u32> primitive_restart_enable; - BitField<3, 1, u32> depth_bias_enable; - BitField<4, 1, u32> depth_clamp_disabled; - BitField<5, 1, u32> ndc_minus_one_to_one; - BitField<6, 2, u32> polygon_mode; - BitField<8, 5, u32> patch_control_points_minus_one; - BitField<13, 2, u32> tessellation_primitive; - BitField<15, 2, u32> tessellation_spacing; - BitField<17, 1, u32> tessellation_clockwise; - BitField<18, 1, u32> logic_op_enable; - BitField<19, 4, u32> logic_op; - BitField<23, 1, u32> rasterize_enable; + BitField<0, 1, u32> extended_dynamic_state; + BitField<1, 1, u32> dynamic_vertex_input; + BitField<2, 1, u32> xfb_enabled; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> depth_clamp_disabled; + BitField<6, 1, u32> ndc_minus_one_to_one; + BitField<7, 2, u32> polygon_mode; + BitField<9, 5, u32> patch_control_points_minus_one; + BitField<14, 2, u32> tessellation_primitive; + BitField<16, 2, u32> tessellation_spacing; + BitField<18, 1, u32> tessellation_clockwise; + BitField<19, 1, u32> logic_op_enable; + BitField<20, 4, u32> logic_op; BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> early_z; - BitField<4, 1, u32> depth_enabled; - BitField<5, 5, u32> depth_format; - BitField<10, 1, u32> y_negate; - BitField<11, 1, u32> provoking_vertex_last; + BitField<0, 1, u32> rasterize_enable; + BitField<1, 3, u32> alpha_test_func; + BitField<4, 1, u32> early_z; + BitField<5, 1, u32> depth_enabled; + BitField<6, 5, u32> depth_format; + BitField<11, 1, u32> y_negate; + BitField<12, 1, u32> provoking_vertex_last; }; std::array color_formats; u32 alpha_test_ref; u32 point_size; - std::array binding_divisors; - std::array attributes; std::array attachments; std::array viewport_swizzles; + union { + u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state + u64 enabled_divisors; + }; + std::array attributes; + std::array binding_divisors; + DynamicState dynamic_state; VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, + bool has_dynamic_vertex_input); size_t Hash() const noexcept; @@ -216,16 +223,24 @@ struct FixedPipelineState { } size_t Size() const noexcept { - if (xfb_enabled != 0) { + if (xfb_enabled) { // When transform feedback is enabled, use the whole struct return sizeof(*this); - } else if (no_extended_dynamic_state != 0) { - // Dynamic state is enabled, we can enable more - return offsetof(FixedPipelineState, xfb_state); - } else { - // No XFB, extended dynamic state enabled + } + if (dynamic_vertex_input) { + // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, attributes); + } + if (extended_dynamic_state) { + // Exclude dynamic state return offsetof(FixedPipelineState, dynamic_state); } + // Default + return offsetof(FixedPipelineState, xfb_state); + } + + u32 DynamicAttributeType(size_t index) const noexcept { + return (attribute_types >> (index * 2)) & 0b11; } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 06a80c2ba..ccef71f4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,39 +472,65 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (!device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = key.state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ + static_vector vertex_attributes; + if (key.state.dynamic_vertex_input) { + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const u32 type = key.state.DynamicAttributeType(index); + if (!input_attributes[index].used || type == 0) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = 0, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT + : VK_FORMAT_R32_UINT, + .offset = 0, + }); + } + if (!vertex_attributes.empty()) { + vertex_bindings.push_back({ + .binding = 0, + .stride = 4, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }); + } + } else { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = + instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ .binding = static_cast(index), - .divisor = key.state.binding_divisors[index], + .stride = dynamic.vertex_strides[index], + .inputRate = rate, }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = key.state.binding_divisors[index], + }); + } } - } - static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < key.state.attributes.size(); ++index) { - const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { - continue; + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !input_attributes[index].used) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); } VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -545,27 +571,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; + std::array swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } + const VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, @@ -660,13 +684,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; - if (device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, @@ -678,6 +702,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; + if (key.state.dynamic_vertex_input) { + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6df4088a7..db7da5555 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,6 +109,20 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { + switch (state.DynamicAttributeType(index)) { + case 0: + return Shader::AttributeType::Disabled; + case 1: + return Shader::AttributeType::Float; + case 2: + return Shader::AttributeType::SignedInt; + case 3: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Disabled; +} + Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -123,13 +137,19 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { info.fixed_state_point_size = point_size; } - if (key.state.xfb_enabled != 0) { + if (key.state.xfb_enabled) { info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } - std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), - &CastAttributeType); + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + info.generic_input_types[index] = AttributeType(key.state, index); + } + } else { + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + } break; case Shader::Stage::TessellationEval: // We have to flip tessellation clockwise for some reason... @@ -298,7 +318,8 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + device.IsExtVertexInputDynamicStateSupported()); if (current_pipeline) { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e339e9739..855c17769 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -551,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateFrontFace(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { + UpdateVertexInput(regs); + } } } @@ -780,4 +783,57 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } +void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[Dirty::VertexInput]) { + return; + } + dirty[Dirty::VertexInput] = false; + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexAttribute0 + index]) { + continue; + } + const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; + const u32 binding{attribute.buffer}; + dirty[Dirty::VertexAttribute0 + index] = false; + dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; + + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = attribute.IsConstant() + ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 + : MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexBinding0 + index]) { + continue; + } + dirty[Dirty::VertexBinding0 + index] = false; + + const u32 binding{static_cast(index)}; + const auto& input_binding{regs.vertex_array[binding]}; + const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; + bindings.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, + .pNext = nullptr, + .binding = binding, + .stride = input_binding.stride, + .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, + .divisor = is_instanced ? input_binding.divisor : 1, + }); + } + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + cmdbuf.SetVertexInputEXT(bindings, attributes); + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1302bed02..c954fa7f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -135,6 +135,8 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..0ebe0473f 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, + Viewports, Scissors, DepthBias, BlendConstants, + DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, + DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, + StencilOp, StencilTestEnable, VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { flags[index] = true; } + for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { + flags[index] = true; + } + for (int index = VertexBinding0; index <= VertexBinding31; ++index) { + flags[index] = true; + } return flags; } @@ -134,31 +141,38 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); } -void SetupDirtyInstanceDivisors(Tables& tables) { - static constexpr size_t divisor_offset = 3; - for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { - tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; - tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = - InstanceDivisors; +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; } } void SetupDirtyVertexAttributes(Tables& tables) { - FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); + for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); + } + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); } -void SetupDirtyViewportSwizzles(Tables& tables) { - static constexpr size_t swizzle_offset = 6; - for (size_t index = 0; index < Regs::NumViewports; ++index) { - tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = - ViewportSwizzles; +void SetupDirtyVertexBindings(Tables& tables) { + // Do NOT include stride here, it's implicit in VertexBuffer + static constexpr size_t divisor_offset = 3; + for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const u8 flag = static_cast(VertexBinding0 + i); + tables[0][OFF(instanced_arrays) + i] = VertexInput; + tables[1][OFF(instanced_arrays) + i] = flag; + tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; + tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; } } } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables = gpu.Maxwell3D().dirty.tables; + auto& tables{gpu.Maxwell3D().dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -175,9 +189,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); - SetupDirtyInstanceDivisors(tables); - SetupDirtyVertexAttributes(tables); SetupDirtyViewportSwizzles(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyVertexBindings(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..1976b7e9b 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -19,6 +19,12 @@ namespace Dirty { enum : u8 { First = VideoCommon::Dirty::LastCommonEntry, + VertexInput, + VertexAttribute0, + VertexAttribute31 = VertexAttribute0 + 31, + VertexBinding0, + VertexBinding31 = VertexBinding0 + 31, + Viewports, Scissors, DepthBias, @@ -36,8 +42,6 @@ enum : u8 { StencilTestEnable, Blending, - InstanceDivisors, - VertexAttributes, ViewportSwizzles, Last -- cgit v1.2.3 From 41cca8b8ad6f7ea33e74210aee4e3867ffa0622e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:24:49 -0300 Subject: vk_pipeline_cache: Skip cached pipelines with different dynamic state --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index db7da5555..b17f34cdd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -391,10 +391,16 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; + const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); + const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); + if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || + (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { + return; + } workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; -- cgit v1.2.3 From dbf7cb9f90a99faa6d6ab07558d65dd113728ff1 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Mon, 14 Jun 2021 22:02:42 -0300 Subject: vk_graphics_pipeline: Fix path with no VK_EXT_extended_dynamic_state --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ccef71f4c..e02b1b7ab 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,7 +472,7 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (key.state.extended_dynamic_state) { + if (!key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; -- cgit v1.2.3 From 8fb204893430de2d5c30e008e98db313f890f447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 02:43:01 -0300 Subject: vk_rasterizer: Exit render passes on fragment barriers --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 855c17769..c57e16c50 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -440,6 +440,7 @@ void RasterizerVulkan::WaitForIdle() { void RasterizerVulkan::FragmentBarrier() { // We already put barriers when a render pass finishes + scheduler.RequestOutsideRenderPassOperationContext(); } void RasterizerVulkan::TiledCacheBarrier() { -- cgit v1.2.3 From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b17f34cdd..0b6fe8e2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,7 +20,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/program_header.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72e6f4207..dc028306a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -90,7 +90,7 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso return {}; } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { if (attr.enabled == 0) { return Shader::AttributeType::Disabled; } @@ -124,9 +124,15 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; - + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; @@ -499,6 +505,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; + const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -511,7 +518,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -519,6 +526,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } + previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; -- cgit v1.2.3 From ca67077ca87772b4b4ac61d08f5b2c60616348e0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 21:14:57 -0300 Subject: vk_graphics_pipeline: Use VK_KHR_push_descriptor when available ~51% faster on Nvidia compared to previous method. --- src/video_core/renderer_vulkan/pipeline_helper.h | 32 ++++++++++++++++------ .../renderer_vulkan/vk_compute_pipeline.cpp | 8 ++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 ++++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + 4 files changed, 45 insertions(+), 24 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index c6e5e059b..4847db6b6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -16,38 +16,50 @@ #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { class DescriptorLayoutBuilder { public: - DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} - vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + bool CanUsePushDescriptor() const noexcept { + return device->IsKhrPushDescriptorSupported() && + num_descriptors <= device->MaxPushDescriptors(); + } + + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { if (bindings.empty()) { return nullptr; } - return device->CreateDescriptorSetLayout({ + const VkDescriptorSetLayoutCreateFlags flags = + use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + return device->GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, - .flags = 0, + .flags = flags, .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }); } vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) const { + VkPipelineLayout pipeline_layout, + bool use_push_descriptor) const { if (entries.empty()) { return nullptr; } - return device->CreateDescriptorUpdateTemplateKHR({ + const VkDescriptorUpdateTemplateType type = + use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR + : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, .pNext = nullptr, .flags = 0, .descriptorUpdateEntryCount = static_cast(entries.size()), .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .templateType = type, .descriptorSetLayout = descriptor_set_layout, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .pipelineLayout = pipeline_layout, @@ -56,7 +68,7 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { - return device->CreatePipelineLayout({ + return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -97,14 +109,16 @@ private: .stride = sizeof(DescriptorUpdateEntry), }); ++binding; + num_descriptors += descriptors[i].count; offset += sizeof(DescriptorUpdateEntry); } } - const vk::Device* device{}; + const Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; + u32 num_descriptors{}; size_t offset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cc855a62e..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript uniform_buffer_sizes.begin()); auto func{[this, &descriptor_pool, shader_notify] { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(false); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = - builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e02b1b7ab..2b59a9d88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; constexpr size_t MAX_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline( } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - + uses_push_descriptor = builder.CanUsePushDescriptor(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); + if (!uses_push_descriptor) { + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + } const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); - descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + descriptor_update_template = + builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); @@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() { if (!descriptor_set_layout) { return; } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_set, nullptr); + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, + 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } }); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 40d1edabd..622267147 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -148,6 +148,7 @@ private: std::condition_variable build_condvar; std::mutex build_mutex; std::atomic_bool is_built{false}; + bool uses_push_descriptor{false}; }; } // namespace Vulkan -- cgit v1.2.3 From fcff19e0fa3d21130bc7b6cd50a10db102b5d4d7 Mon Sep 17 00:00:00 2001 From: ameerj Date: Thu, 17 Jun 2021 23:12:41 -0400 Subject: shaders: Allow shader notify when async shaders is disabled --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dc028306a..e83628c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -529,11 +529,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, - texture_cache, notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -596,9 +595,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, notify, program.info, + thread_worker, &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { -- cgit v1.2.3 From f5db8c74405c93b52efbdef318790bd9ec4661c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:23:50 -0300 Subject: gl_shader_cache: Check previous pipeline before checking hash map Port optimization from Vulkan. --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 42da2960b..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -146,12 +146,11 @@ private: BufferCache& buffer_cache; TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; + bool use_asynchronous_shaders{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - bool use_asynchronous_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e83628c13..ec06b124f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,7 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw }; host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), - .support_int64 = true, + .support_int64 = device.IsShaderInt64Supported(), }; } -- cgit v1.2.3 From 395bed3a0af90a53be44e81eadd06f4931c8e933 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 02:41:00 -0300 Subject: shader: Unify shader stage types --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 15 ++++++++------- src/video_core/renderer_vulkan/maxwell_to_vk.h | 3 ++- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f0b0b8ec..8f9b9a11a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 50a599c11..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2b59a9d88..9eb353a88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -737,7 +737,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), .module = *spv_modules[stage], .pName = "main", .pSpecializationInfo = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c57e16c50..f04c3394c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,8 +58,6 @@ struct DrawParams { bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; -- cgit v1.2.3 From 4f052a1f393d45843eabc237e21757be15f20062 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 03:32:41 -0300 Subject: vk_graphics_pipeline: Implement conservative rendering --- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++++++++++++++------ 3 files changed, 26 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 16cef8711..7563dc462 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -87,6 +87,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, depth_format.Assign(static_cast(regs.zeta.format)); y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04f34eb97..66b57b636 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -194,6 +194,7 @@ struct FixedPipelineState { BitField<6, 5, u32> depth_format; BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9eb353a88..70e183e65 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -599,16 +599,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pScissors = nullptr, }; - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 - ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT - : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, - }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, + .pNext = nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -625,6 +618,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 16 ++++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); -- cgit v1.2.3 From 57a8921e01a90ff5993079dd638a6c48e5781756 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:21:51 -0300 Subject: vk_graphics_pipeline: Implement line width --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 1 + src/video_core/renderer_vulkan/vk_state_tracker.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 7 ++++++- 5 files changed, 28 insertions(+), 6 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6d664ed6b..3363a6877 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -705,11 +705,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_LINE_WIDTH, }; if (key.state.extended_dynamic_state) { static constexpr std::array extended{ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f04c3394c..bb7301c53 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -541,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthBoundsTestEnable(regs); @@ -676,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineWidth()) { + return; + } + const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; + scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c954fa7f8..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -125,6 +125,7 @@ private: void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 0ebe0473f..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,10 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, - DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, - DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, - StencilOp, StencilTestEnable, VertexBuffers, VertexInput, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, + DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -86,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(cull_face)] = CullMode; @@ -180,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); SetupDirtyDepthBoundsEnable(tables); SetupDirtyDepthTestEnable(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1976b7e9b..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -31,6 +31,7 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + LineWidth, CullMode, DepthBoundsEnable, @@ -44,7 +45,7 @@ enum : u8 { Blending, ViewportSwizzles, - Last + Last, }; static_assert(Last <= std::numeric_limits::max()); @@ -93,6 +94,10 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchLineWidth() const { + return Exchange(Dirty::LineWidth, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } -- cgit v1.2.3 From f94f0be5215369a6985247ad936d9d9f43c9b140 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:25:19 -0300 Subject: vk_graphics_pipeline: Implement smooth lines --- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 7563dc462..d089da8a4 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -88,6 +88,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); + smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 66b57b636..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -195,6 +195,7 @@ struct FixedPipelineState { BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; BitField<13, 1, u32> conservative_raster_enable; + BitField<14, 1, u32> smooth_lines; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3363a6877..f0ae0b0d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -80,6 +80,14 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); } +bool IsLine(VkPrimitiveTopology topology) { + static constexpr std::array line_topologies{ + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, + // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, + }; + return std::ranges::find(line_topologies, topology) == line_topologies.end(); +} + VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { union Swizzle { u32 raw; @@ -616,6 +624,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .lineRasterizationMode = key.state.smooth_lines != 0 + ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT + : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, + .stippledLineEnable = VK_FALSE, // TODO + .lineStippleFactor = 0, + .lineStipplePattern = 0, + }; VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, .pNext = nullptr, @@ -632,6 +650,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; + if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { + line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); + } if (device.IsExtConservativeRasterizationSupported()) { conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); } -- cgit v1.2.3 From fba6bd92d456b4d472ed37e663006fafeef154a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 17:46:01 -0300 Subject: vk_rasterizer: Workaround bug in VK_EXT_vertex_input_dynamic_state Workaround potential bug on Nvidia's driver where only updating high attributes leaves low attributes out dated. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 +++++++++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d089da8a4..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -128,7 +128,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.enabled.Assign(input.constant ? 0 : 1); attribute.buffer.Assign(input.buffer); attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bb7301c53..99576b826 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -801,25 +801,30 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) boost::container::static_vector bindings; boost::container::static_vector attributes; + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexAttribute0 + index]) { - continue; + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = attribute.IsConstant() - ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 - : MaxwellToVK::VertexFormat(attribute.type, attribute.size), - .offset = attribute.offset, - }); + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { -- cgit v1.2.3 From 8722668b3c027f0132d0be07e867247debd08d30 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:42:17 -0300 Subject: emit_spirv: Workaround VK_KHR_shader_float_controls on fp16 Nvidia Fix regression on Fire Emblem: Three Houses when using native fp16. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7aaa40ef2..87b843e3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,6 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, .ignore_nan_fp_comparisons = false, }; host_info = Shader::HostTranslateInfo{ -- cgit v1.2.3 From 11f04f1022d0820a1fdba38221ecd38f19d86d9e Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 4 Jul 2021 00:34:53 -0400 Subject: shader: Ignore global memory ops on devices lacking int64 support --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87b843e3d..a2646fc6d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -280,6 +280,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_descriptor_aliasing = true, .support_int8 = true, .support_int16 = device.IsShaderInt16Supported(), + .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From be54aad1c40bb50c71e7bcd6465c2fd372c11cb7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Jul 2021 00:18:30 -0300 Subject: maxwell_to_vk: Add R16_SNORM --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f9b9a11a..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -157,7 +157,7 @@ struct FormatTuple { {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM - {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM -- cgit v1.2.3 From 8390286a89dd259f0ff44cc95fc20d017b58046f Mon Sep 17 00:00:00 2001 From: ameerj Date: Fri, 9 Jul 2021 19:00:11 -0400 Subject: renderers: Disable async shader compilation The current implementation is prone to causing graphical issues. Disable until a better solution is implemented. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2646fc6d..39db35175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -269,7 +269,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_asynchronous_shaders{false}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From 41493fbe89200a4a8321dec7b313872435c57df7 Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 11 Jul 2021 01:04:52 -0400 Subject: renderers: Fix clang formatting --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6fda06a7e..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f0ae0b0d6..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -503,9 +503,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { vertex_attributes.push_back({ .location = static_cast(index), .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, .offset = 0, }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 99576b826..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,7 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); -- cgit v1.2.3 From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 622267147..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -104,9 +104,7 @@ public: template static auto MakeConfigureSpecFunc() { - return [](GraphicsPipeline* pipeline, bool is_indexed) { - pipeline->ConfigureImpl(is_indexed); - }; + return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl(is_indexed); }; } private: -- cgit v1.2.3 From 258f35515d61d01049d2e433146cab808837bb7d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 21:07:12 -0300 Subject: shader_environment: Receive cache version from outside This allows us invalidating OpenGL and Vulkan separately in the future. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 39db35175..2ce8b4156 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,6 +54,8 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -434,7 +436,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -562,7 +565,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } @@ -581,7 +584,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } serialization_thread.QueueWork([this, key, env = std::move(env)] { SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } -- cgit v1.2.3 From 3c6d440015d7ffb81eedbfcd7ee1aab1ea87ee2a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:08:06 -0300 Subject: Revert "renderers: Disable async shader compilation" This reverts commit 4a152767286717fa69bfc94846a124a366f70065. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2ce8b4156..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{false}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From a55ff22900c5261915eb8b88f2c0f18a4eb6f30f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:52:29 -0300 Subject: vulkan/blit_image: Commit descriptor sets within worker thread Fixes race condition caused. The descriptor pool is not thread safe, so we have to commit descriptor sets within the same thread. --- src/video_core/renderer_vulkan/blit_image.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_vulkan') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 4058f62cd..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -376,11 +376,11 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, + src_view](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, @@ -402,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); - const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, - src_stencil_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + src_stencil_view, this](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -448,14 +447,12 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ .width = src_image_view.size.width, .height = src_image_view.size.height, }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -476,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb .tex_scale = {viewport.width, viewport.height}, .tex_offset = {0.0f, 0.0f}, }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); // TODO: Barriers -- cgit v1.2.3