diff options
| author | 2021-03-19 19:28:31 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:23 -0400 | |
| commit | 260743f371236f7c57b01334b1c3474b15a47c39 (patch) | |
| tree | 312d89fa8215199ef5f7ec1fc84b025df526e107 | |
| parent | shader: Implement DADD (diff) | |
| download | yuzu-260743f371236f7c57b01334b1c3474b15a47c39.tar.gz yuzu-260743f371236f7c57b01334b1c3474b15a47c39.tar.xz yuzu-260743f371236f7c57b01334b1c3474b15a47c39.zip | |
shader: Add partial rasterizer integration
Diffstat (limited to '')
54 files changed, 1927 insertions, 566 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b870e9937..31c394106 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC | |||
| 65 | frontend/maxwell/translate/impl/common_funcs.h | 65 | frontend/maxwell/translate/impl/common_funcs.h |
| 66 | frontend/maxwell/translate/impl/condition_code_set.cpp | 66 | frontend/maxwell/translate/impl/condition_code_set.cpp |
| 67 | frontend/maxwell/translate/impl/double_add.cpp | 67 | frontend/maxwell/translate/impl/double_add.cpp |
| 68 | frontend/maxwell/translate/impl/exit_program.cpp | ||
| 68 | frontend/maxwell/translate/impl/find_leading_one.cpp | 69 | frontend/maxwell/translate/impl/find_leading_one.cpp |
| 69 | frontend/maxwell/translate/impl/floating_point_add.cpp | 70 | frontend/maxwell/translate/impl/floating_point_add.cpp |
| 70 | frontend/maxwell/translate/impl/floating_point_compare.cpp | 71 | frontend/maxwell/translate/impl/floating_point_compare.cpp |
| @@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC | |||
| 121 | ir_opt/texture_pass.cpp | 122 | ir_opt/texture_pass.cpp |
| 122 | ir_opt/verification_pass.cpp | 123 | ir_opt/verification_pass.cpp |
| 123 | object_pool.h | 124 | object_pool.h |
| 125 | program_header.h | ||
| 124 | profile.h | 126 | profile.h |
| 125 | recompiler.cpp | ||
| 126 | recompiler.h | ||
| 127 | shader_info.h | 127 | shader_info.h |
| 128 | ) | 128 | ) |
| 129 | 129 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 204389d74..6c79b611b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie | |||
| 62 | } | 62 | } |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | EmitContext::EmitContext(const Profile& profile_, IR::Program& program) | 65 | EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) |
| 66 | : Sirit::Module(0x00010000), profile{profile_} { | 66 | : Sirit::Module(0x00010000), profile{profile_} { |
| 67 | AddCapability(spv::Capability::Shader); | 67 | AddCapability(spv::Capability::Shader); |
| 68 | DefineCommonTypes(program.info); | 68 | DefineCommonTypes(program.info); |
| 69 | DefineCommonConstants(); | 69 | DefineCommonConstants(); |
| 70 | DefineSpecialVariables(program.info); | 70 | DefineInterfaces(program.info, program.stage); |
| 71 | |||
| 72 | u32 binding{}; | ||
| 73 | DefineConstantBuffers(program.info, binding); | 71 | DefineConstantBuffers(program.info, binding); |
| 74 | DefineStorageBuffers(program.info, binding); | 72 | DefineStorageBuffers(program.info, binding); |
| 75 | DefineTextures(program.info, binding); | 73 | DefineTextures(program.info, binding); |
| 76 | |||
| 77 | DefineLabels(program); | 74 | DefineLabels(program); |
| 78 | } | 75 | } |
| 79 | 76 | ||
| @@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) { | |||
| 96 | return Constant(F32[1], value.F32()); | 93 | return Constant(F32[1], value.F32()); |
| 97 | case IR::Type::F64: | 94 | case IR::Type::F64: |
| 98 | return Constant(F64[1], value.F64()); | 95 | return Constant(F64[1], value.F64()); |
| 96 | case IR::Type::Label: | ||
| 97 | return value.Label()->Definition<Id>(); | ||
| 99 | default: | 98 | default: |
| 100 | throw NotImplementedException("Immediate type {}", value.Type()); | 99 | throw NotImplementedException("Immediate type {}", value.Type()); |
| 101 | } | 100 | } |
| @@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||
| 109 | F32.Define(*this, TypeFloat(32), "f32"); | 108 | F32.Define(*this, TypeFloat(32), "f32"); |
| 110 | U32.Define(*this, TypeInt(32, false), "u32"); | 109 | U32.Define(*this, TypeInt(32, false), "u32"); |
| 111 | 110 | ||
| 111 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); | ||
| 112 | output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); | ||
| 113 | |||
| 112 | if (info.uses_int8) { | 114 | if (info.uses_int8) { |
| 113 | AddCapability(spv::Capability::Int8); | 115 | AddCapability(spv::Capability::Int8); |
| 114 | U8 = Name(TypeInt(8, false), "u8"); | 116 | U8 = Name(TypeInt(8, false), "u8"); |
| @@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() { | |||
| 139 | u32_zero_value = Constant(U32[1], 0U); | 141 | u32_zero_value = Constant(U32[1], 0U); |
| 140 | } | 142 | } |
| 141 | 143 | ||
| 142 | void EmitContext::DefineSpecialVariables(const Info& info) { | 144 | void EmitContext::DefineInterfaces(const Info& info, Stage stage) { |
| 143 | const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { | 145 | const auto define{ |
| 144 | const Id pointer_type{TypePointer(storage_class, type)}; | 146 | [this](Id type, std::optional<spv::BuiltIn> builtin, spv::StorageClass storage_class) { |
| 145 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; | 147 | const Id pointer_type{TypePointer(storage_class, type)}; |
| 146 | Decorate(id, spv::Decoration::BuiltIn, builtin); | 148 | const Id id{AddGlobalVariable(pointer_type, storage_class)}; |
| 147 | return id; | 149 | if (builtin) { |
| 148 | }}; | 150 | Decorate(id, spv::Decoration::BuiltIn, *builtin); |
| 151 | } | ||
| 152 | interfaces.push_back(id); | ||
| 153 | return id; | ||
| 154 | }}; | ||
| 149 | using namespace std::placeholders; | 155 | using namespace std::placeholders; |
| 150 | const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; | 156 | const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; |
| 157 | const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; | ||
| 151 | 158 | ||
| 152 | if (info.uses_workgroup_id) { | 159 | if (info.uses_workgroup_id) { |
| 153 | workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); | 160 | workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); |
| @@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) { | |||
| 155 | if (info.uses_local_invocation_id) { | 162 | if (info.uses_local_invocation_id) { |
| 156 | local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); | 163 | local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); |
| 157 | } | 164 | } |
| 165 | if (info.loads_position) { | ||
| 166 | const bool is_fragment{stage != Stage::Fragment}; | ||
| 167 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; | ||
| 168 | input_position = define_input(F32[4], built_in); | ||
| 169 | } | ||
| 170 | for (size_t i = 0; i < info.loads_generics.size(); ++i) { | ||
| 171 | if (info.loads_generics[i]) { | ||
| 172 | // FIXME: Declare size from input | ||
| 173 | input_generics[i] = define_input(F32[4], std::nullopt); | ||
| 174 | Decorate(input_generics[i], spv::Decoration::Location, static_cast<u32>(i)); | ||
| 175 | Name(input_generics[i], fmt::format("in_attr{}", i)); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | if (info.stores_position) { | ||
| 179 | output_position = define_output(F32[4], spv::BuiltIn::Position); | ||
| 180 | } | ||
| 181 | for (size_t i = 0; i < info.stores_generics.size(); ++i) { | ||
| 182 | if (info.stores_generics[i]) { | ||
| 183 | output_generics[i] = define_output(F32[4], std::nullopt); | ||
| 184 | Decorate(output_generics[i], spv::Decoration::Location, static_cast<u32>(i)); | ||
| 185 | Name(output_generics[i], fmt::format("out_attr{}", i)); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | if (stage == Stage::Fragment) { | ||
| 189 | for (size_t i = 0; i < 8; ++i) { | ||
| 190 | if (!info.stores_frag_color[i]) { | ||
| 191 | continue; | ||
| 192 | } | ||
| 193 | frag_color[i] = define_output(F32[4], std::nullopt); | ||
| 194 | Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i)); | ||
| 195 | Name(frag_color[i], fmt::format("frag_color{}", i)); | ||
| 196 | } | ||
| 197 | } | ||
| 158 | } | 198 | } |
| 159 | 199 | ||
| 160 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | 200 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 35eca258a..2d7961ac3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -46,7 +46,7 @@ struct UniformDefinitions { | |||
| 46 | 46 | ||
| 47 | class EmitContext final : public Sirit::Module { | 47 | class EmitContext final : public Sirit::Module { |
| 48 | public: | 48 | public: |
| 49 | explicit EmitContext(const Profile& profile, IR::Program& program); | 49 | explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); |
| 50 | ~EmitContext(); | 50 | ~EmitContext(); |
| 51 | 51 | ||
| 52 | [[nodiscard]] Id Def(const IR::Value& value); | 52 | [[nodiscard]] Id Def(const IR::Value& value); |
| @@ -71,6 +71,9 @@ public: | |||
| 71 | 71 | ||
| 72 | UniformDefinitions uniform_types; | 72 | UniformDefinitions uniform_types; |
| 73 | 73 | ||
| 74 | Id input_f32{}; | ||
| 75 | Id output_f32{}; | ||
| 76 | |||
| 74 | Id storage_u32{}; | 77 | Id storage_u32{}; |
| 75 | 78 | ||
| 76 | std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; | 79 | std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; |
| @@ -80,10 +83,21 @@ public: | |||
| 80 | Id workgroup_id{}; | 83 | Id workgroup_id{}; |
| 81 | Id local_invocation_id{}; | 84 | Id local_invocation_id{}; |
| 82 | 85 | ||
| 86 | Id input_position{}; | ||
| 87 | std::array<Id, 32> input_generics{}; | ||
| 88 | |||
| 89 | Id output_position{}; | ||
| 90 | std::array<Id, 32> output_generics{}; | ||
| 91 | |||
| 92 | std::array<Id, 8> frag_color{}; | ||
| 93 | Id frag_depth {}; | ||
| 94 | |||
| 95 | std::vector<Id> interfaces; | ||
| 96 | |||
| 83 | private: | 97 | private: |
| 84 | void DefineCommonTypes(const Info& info); | 98 | void DefineCommonTypes(const Info& info); |
| 85 | void DefineCommonConstants(); | 99 | void DefineCommonConstants(); |
| 86 | void DefineSpecialVariables(const Info& info); | 100 | void DefineInterfaces(const Info& info, Stage stage); |
| 87 | void DefineConstantBuffers(const Info& info, u32& binding); | 101 | void DefineConstantBuffers(const Info& info, u32& binding); |
| 88 | void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, | 102 | void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, |
| 89 | Id type, char type_char, u32 element_size); | 103 | Id type, char type_char, u32 element_size); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50c0f7243..b8978b94a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { | |||
| 54 | return arg.U32(); | 54 | return arg.U32(); |
| 55 | } else if constexpr (std::is_same_v<ArgType, IR::Block*>) { | 55 | } else if constexpr (std::is_same_v<ArgType, IR::Block*>) { |
| 56 | return arg.Label(); | 56 | return arg.Label(); |
| 57 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 58 | return arg.Attribute(); | ||
| 57 | } | 59 | } |
| 58 | } | 60 | } |
| 59 | 61 | ||
| @@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { | |||
| 197 | } | 199 | } |
| 198 | } // Anonymous namespace | 200 | } // Anonymous namespace |
| 199 | 201 | ||
| 200 | std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { | 202 | std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, |
| 201 | EmitContext ctx{profile, program}; | 203 | u32& binding) { |
| 204 | EmitContext ctx{profile, program, binding}; | ||
| 202 | const Id void_function{ctx.TypeFunction(ctx.void_id)}; | 205 | const Id void_function{ctx.TypeFunction(ctx.void_id)}; |
| 203 | const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; | 206 | const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; |
| 204 | for (IR::Block* const block : program.blocks) { | 207 | for (IR::Block* const block : program.blocks) { |
| @@ -208,28 +211,41 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program | |||
| 208 | } | 211 | } |
| 209 | } | 212 | } |
| 210 | ctx.OpFunctionEnd(); | 213 | ctx.OpFunctionEnd(); |
| 211 | boost::container::small_vector<Id, 32> interfaces; | 214 | |
| 212 | const Info& info{program.info}; | 215 | const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); |
| 213 | if (info.uses_workgroup_id) { | 216 | spv::ExecutionModel execution_model{}; |
| 214 | interfaces.push_back(ctx.workgroup_id); | 217 | switch (env.ShaderStage()) { |
| 218 | case Shader::Stage::Compute: { | ||
| 219 | const std::array<u32, 3> workgroup_size{env.WorkgroupSize()}; | ||
| 220 | execution_model = spv::ExecutionModel::GLCompute; | ||
| 221 | ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], | ||
| 222 | workgroup_size[1], workgroup_size[2]); | ||
| 223 | break; | ||
| 215 | } | 224 | } |
| 216 | if (info.uses_local_invocation_id) { | 225 | case Shader::Stage::VertexB: |
| 217 | interfaces.push_back(ctx.local_invocation_id); | 226 | execution_model = spv::ExecutionModel::Vertex; |
| 227 | break; | ||
| 228 | case Shader::Stage::Fragment: | ||
| 229 | execution_model = spv::ExecutionModel::Fragment; | ||
| 230 | ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); | ||
| 231 | break; | ||
| 232 | default: | ||
| 233 | throw NotImplementedException("Stage {}", env.ShaderStage()); | ||
| 218 | } | 234 | } |
| 219 | const std::span interfaces_span(interfaces.data(), interfaces.size()); | 235 | ctx.AddEntryPoint(execution_model, func, "main", interfaces); |
| 220 | ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); | ||
| 221 | |||
| 222 | const std::array<u32, 3> workgroup_size{env.WorkgroupSize()}; | ||
| 223 | ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], | ||
| 224 | workgroup_size[2]); | ||
| 225 | 236 | ||
| 226 | SetupDenormControl(profile, program, ctx, func); | 237 | SetupDenormControl(profile, program, ctx, func); |
| 238 | const Info& info{program.info}; | ||
| 227 | if (info.uses_sampled_1d) { | 239 | if (info.uses_sampled_1d) { |
| 228 | ctx.AddCapability(spv::Capability::Sampled1D); | 240 | ctx.AddCapability(spv::Capability::Sampled1D); |
| 229 | } | 241 | } |
| 230 | if (info.uses_sparse_residency) { | 242 | if (info.uses_sparse_residency) { |
| 231 | ctx.AddCapability(spv::Capability::SparseResidency); | 243 | ctx.AddCapability(spv::Capability::SparseResidency); |
| 232 | } | 244 | } |
| 245 | if (info.uses_demote_to_helper_invocation) { | ||
| 246 | ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); | ||
| 247 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); | ||
| 248 | } | ||
| 233 | // TODO: Track this usage | 249 | // TODO: Track this usage |
| 234 | ctx.AddCapability(spv::Capability::ImageGatherExtended); | 250 | ctx.AddCapability(spv::Capability::ImageGatherExtended); |
| 235 | 251 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89566c83d..ae121f534 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -16,18 +16,18 @@ | |||
| 16 | namespace Shader::Backend::SPIRV { | 16 | namespace Shader::Backend::SPIRV { |
| 17 | 17 | ||
| 18 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, | 18 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, |
| 19 | IR::Program& program); | 19 | IR::Program& program, u32& binding); |
| 20 | 20 | ||
| 21 | // Microinstruction emitters | 21 | // Microinstruction emitters |
| 22 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst); | 22 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst); |
| 23 | void EmitVoid(EmitContext& ctx); | 23 | void EmitVoid(EmitContext& ctx); |
| 24 | Id EmitIdentity(EmitContext& ctx, const IR::Value& value); | 24 | Id EmitIdentity(EmitContext& ctx, const IR::Value& value); |
| 25 | void EmitBranch(EmitContext& ctx, IR::Block* label); | 25 | void EmitBranch(EmitContext& ctx, Id label); |
| 26 | void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, | 26 | void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); |
| 27 | IR::Block* false_label); | 27 | void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); |
| 28 | void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); | 28 | void EmitSelectionMerge(EmitContext& ctx, Id merge_label); |
| 29 | void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); | ||
| 30 | void EmitReturn(EmitContext& ctx); | 29 | void EmitReturn(EmitContext& ctx); |
| 30 | void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); | ||
| 31 | void EmitGetRegister(EmitContext& ctx); | 31 | void EmitGetRegister(EmitContext& ctx); |
| 32 | void EmitSetRegister(EmitContext& ctx); | 32 | void EmitSetRegister(EmitContext& ctx); |
| 33 | void EmitGetPred(EmitContext& ctx); | 33 | void EmitGetPred(EmitContext& ctx); |
| @@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o | |||
| 41 | Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | 41 | Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); |
| 42 | Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | 42 | Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); |
| 43 | Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | 43 | Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); |
| 44 | void EmitGetAttribute(EmitContext& ctx); | 44 | Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); |
| 45 | void EmitSetAttribute(EmitContext& ctx); | 45 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); |
| 46 | void EmitGetAttributeIndexed(EmitContext& ctx); | 46 | void EmitGetAttributeIndexed(EmitContext& ctx); |
| 47 | void EmitSetAttributeIndexed(EmitContext& ctx); | 47 | void EmitSetAttributeIndexed(EmitContext& ctx); |
| 48 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); | ||
| 49 | void EmitSetFragDepth(EmitContext& ctx, Id value); | ||
| 48 | void EmitGetZFlag(EmitContext& ctx); | 50 | void EmitGetZFlag(EmitContext& ctx); |
| 49 | void EmitGetSFlag(EmitContext& ctx); | 51 | void EmitGetSFlag(EmitContext& ctx); |
| 50 | void EmitGetCFlag(EmitContext& ctx); | 52 | void EmitGetCFlag(EmitContext& ctx); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 125b58cf7..02d115740 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -5,6 +5,43 @@ | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" |
| 6 | 6 | ||
| 7 | namespace Shader::Backend::SPIRV { | 7 | namespace Shader::Backend::SPIRV { |
| 8 | namespace { | ||
| 9 | Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | ||
| 10 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 11 | const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; | ||
| 12 | if (IR::IsGeneric(attr)) { | ||
| 13 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 14 | return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); | ||
| 15 | } | ||
| 16 | switch (attr) { | ||
| 17 | case IR::Attribute::PositionX: | ||
| 18 | case IR::Attribute::PositionY: | ||
| 19 | case IR::Attribute::PositionZ: | ||
| 20 | case IR::Attribute::PositionW: | ||
| 21 | return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); | ||
| 22 | default: | ||
| 23 | throw NotImplementedException("Read attribute {}", attr); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | ||
| 28 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 29 | const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; | ||
| 30 | if (IR::IsGeneric(attr)) { | ||
| 31 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 32 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); | ||
| 33 | } | ||
| 34 | switch (attr) { | ||
| 35 | case IR::Attribute::PositionX: | ||
| 36 | case IR::Attribute::PositionY: | ||
| 37 | case IR::Attribute::PositionZ: | ||
| 38 | case IR::Attribute::PositionW: | ||
| 39 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); | ||
| 40 | default: | ||
| 41 | throw NotImplementedException("Read attribute {}", attr); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 8 | 45 | ||
| 9 | void EmitGetRegister(EmitContext&) { | 46 | void EmitGetRegister(EmitContext&) { |
| 10 | throw NotImplementedException("SPIR-V Instruction"); | 47 | throw NotImplementedException("SPIR-V Instruction"); |
| @@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o | |||
| 87 | return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); | 124 | return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); |
| 88 | } | 125 | } |
| 89 | 126 | ||
| 90 | void EmitGetAttribute(EmitContext&) { | 127 | Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { |
| 91 | throw NotImplementedException("SPIR-V Instruction"); | 128 | return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); |
| 92 | } | 129 | } |
| 93 | 130 | ||
| 94 | void EmitSetAttribute(EmitContext&) { | 131 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { |
| 95 | throw NotImplementedException("SPIR-V Instruction"); | 132 | ctx.OpStore(OutputAttrPointer(ctx, attr), value); |
| 96 | } | 133 | } |
| 97 | 134 | ||
| 98 | void EmitGetAttributeIndexed(EmitContext&) { | 135 | void EmitGetAttributeIndexed(EmitContext&) { |
| @@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) { | |||
| 103 | throw NotImplementedException("SPIR-V Instruction"); | 140 | throw NotImplementedException("SPIR-V Instruction"); |
| 104 | } | 141 | } |
| 105 | 142 | ||
| 143 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { | ||
| 144 | const Id component_id{ctx.Constant(ctx.U32[1], component)}; | ||
| 145 | const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; | ||
| 146 | ctx.OpStore(pointer, value); | ||
| 147 | } | ||
| 148 | |||
| 149 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | ||
| 150 | ctx.OpStore(ctx.frag_depth, value); | ||
| 151 | } | ||
| 152 | |||
| 106 | void EmitGetZFlag(EmitContext&) { | 153 | void EmitGetZFlag(EmitContext&) { |
| 107 | throw NotImplementedException("SPIR-V Instruction"); | 154 | throw NotImplementedException("SPIR-V Instruction"); |
| 108 | } | 155 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 48755b827..6b81f0169 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp | |||
| @@ -6,26 +6,29 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Backend::SPIRV { | 7 | namespace Shader::Backend::SPIRV { |
| 8 | 8 | ||
| 9 | void EmitBranch(EmitContext& ctx, IR::Block* label) { | 9 | void EmitBranch(EmitContext& ctx, Id label) { |
| 10 | ctx.OpBranch(label->Definition<Id>()); | 10 | ctx.OpBranch(label); |
| 11 | } | 11 | } |
| 12 | 12 | ||
| 13 | void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, | 13 | void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { |
| 14 | IR::Block* false_label) { | 14 | ctx.OpBranchConditional(condition, true_label, false_label); |
| 15 | ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>()); | ||
| 16 | } | 15 | } |
| 17 | 16 | ||
| 18 | void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { | 17 | void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { |
| 19 | ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(), | 18 | ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); |
| 20 | spv::LoopControlMask::MaskNone); | ||
| 21 | } | 19 | } |
| 22 | 20 | ||
| 23 | void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { | 21 | void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { |
| 24 | ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone); | 22 | ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); |
| 25 | } | 23 | } |
| 26 | 24 | ||
| 27 | void EmitReturn(EmitContext& ctx) { | 25 | void EmitReturn(EmitContext& ctx) { |
| 28 | ctx.OpReturn(); | 26 | ctx.OpReturn(); |
| 29 | } | 27 | } |
| 30 | 28 | ||
| 29 | void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { | ||
| 30 | ctx.OpDemoteToHelperInvocationEXT(); | ||
| 31 | ctx.OpBranch(continue_label); | ||
| 32 | } | ||
| 33 | |||
| 31 | } // namespace Shader::Backend::SPIRV | 34 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0fcb68050..1fcaa56dd 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | #include <array> | 3 | #include <array> |
| 4 | 4 | ||
| 5 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 6 | #include "shader_recompiler/stage.h" | ||
| 7 | #include "shader_recompiler/program_header.h" | ||
| 6 | 8 | ||
| 7 | namespace Shader { | 9 | namespace Shader { |
| 8 | 10 | ||
| @@ -15,6 +17,18 @@ public: | |||
| 15 | [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; | 17 | [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; |
| 16 | 18 | ||
| 17 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; | 19 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; |
| 20 | |||
| 21 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | ||
| 22 | return sph; | ||
| 23 | } | ||
| 24 | |||
| 25 | [[nodiscard]] Stage ShaderStage() const noexcept { | ||
| 26 | return stage; | ||
| 27 | } | ||
| 28 | |||
| 29 | protected: | ||
| 30 | ProgramHeader sph{}; | ||
| 31 | Stage stage{}; | ||
| 18 | }; | 32 | }; |
| 19 | 33 | ||
| 20 | } // namespace Shader | 34 | } // namespace Shader |
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 2fb7d576f..4811242ea 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp | |||
| @@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept { | |||
| 13 | return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; | 13 | return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | int GenericAttributeIndex(Attribute attribute) { | 16 | u32 GenericAttributeIndex(Attribute attribute) { |
| 17 | if (!IsGeneric(attribute)) { | 17 | if (!IsGeneric(attribute)) { |
| 18 | throw InvalidArgument("Attribute is not generic {}", attribute); | 18 | throw InvalidArgument("Attribute is not generic {}", attribute); |
| 19 | } | 19 | } |
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index bb2cad6af..34ec7e0cd 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h | |||
| @@ -224,7 +224,7 @@ enum class Attribute : u64 { | |||
| 224 | 224 | ||
| 225 | [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; | 225 | [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; |
| 226 | 226 | ||
| 227 | [[nodiscard]] int GenericAttributeIndex(Attribute attribute); | 227 | [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); |
| 228 | 228 | ||
| 229 | [[nodiscard]] std::string NameOf(Attribute attribute); | 229 | [[nodiscard]] std::string NameOf(Attribute attribute); |
| 230 | 230 | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 958282160..672836c0b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -82,6 +82,12 @@ void IREmitter::Return() { | |||
| 82 | Inst(Opcode::Return); | 82 | Inst(Opcode::Return); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void IREmitter::DemoteToHelperInvocation(Block* continue_label) { | ||
| 86 | block->SetBranch(continue_label); | ||
| 87 | continue_label->AddImmediatePredecessor(block); | ||
| 88 | Inst(Opcode::DemoteToHelperInvocation, continue_label); | ||
| 89 | } | ||
| 90 | |||
| 85 | U32 IREmitter::GetReg(IR::Reg reg) { | 91 | U32 IREmitter::GetReg(IR::Reg reg) { |
| 86 | return Inst<U32>(Opcode::GetRegister, reg); | 92 | return Inst<U32>(Opcode::GetRegister, reg); |
| 87 | } | 93 | } |
| @@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { | |||
| 248 | Inst(Opcode::SetAttribute, attribute, value); | 254 | Inst(Opcode::SetAttribute, attribute, value); |
| 249 | } | 255 | } |
| 250 | 256 | ||
| 257 | void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { | ||
| 258 | Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); | ||
| 259 | } | ||
| 260 | |||
| 261 | void IREmitter::SetFragDepth(const F32& value) { | ||
| 262 | Inst(Opcode::SetFragDepth, value); | ||
| 263 | } | ||
| 264 | |||
| 251 | U32 IREmitter::WorkgroupIdX() { | 265 | U32 IREmitter::WorkgroupIdX() { |
| 252 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; | 266 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; |
| 253 | } | 267 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 05263fe8b..72af5db37 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -36,6 +36,7 @@ public: | |||
| 36 | void LoopMerge(Block* merge_block, Block* continue_target); | 36 | void LoopMerge(Block* merge_block, Block* continue_target); |
| 37 | void SelectionMerge(Block* merge_block); | 37 | void SelectionMerge(Block* merge_block); |
| 38 | void Return(); | 38 | void Return(); |
| 39 | void DemoteToHelperInvocation(Block* continue_label); | ||
| 39 | 40 | ||
| 40 | [[nodiscard]] U32 GetReg(IR::Reg reg); | 41 | [[nodiscard]] U32 GetReg(IR::Reg reg); |
| 41 | void SetReg(IR::Reg reg, const U32& value); | 42 | void SetReg(IR::Reg reg, const U32& value); |
| @@ -67,6 +68,9 @@ public: | |||
| 67 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | 68 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); |
| 68 | void SetAttribute(IR::Attribute attribute, const F32& value); | 69 | void SetAttribute(IR::Attribute attribute, const F32& value); |
| 69 | 70 | ||
| 71 | void SetFragColor(u32 index, u32 component, const F32& value); | ||
| 72 | void SetFragDepth(const F32& value); | ||
| 73 | |||
| 70 | [[nodiscard]] U32 WorkgroupIdX(); | 74 | [[nodiscard]] U32 WorkgroupIdX(); |
| 71 | [[nodiscard]] U32 WorkgroupIdY(); | 75 | [[nodiscard]] U32 WorkgroupIdY(); |
| 72 | [[nodiscard]] U32 WorkgroupIdZ(); | 76 | [[nodiscard]] U32 WorkgroupIdZ(); |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5946105d2..21b7d8a9f 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept { | |||
| 55 | case Opcode::LoopMerge: | 55 | case Opcode::LoopMerge: |
| 56 | case Opcode::SelectionMerge: | 56 | case Opcode::SelectionMerge: |
| 57 | case Opcode::Return: | 57 | case Opcode::Return: |
| 58 | case Opcode::DemoteToHelperInvocation: | ||
| 58 | case Opcode::SetAttribute: | 59 | case Opcode::SetAttribute: |
| 59 | case Opcode::SetAttributeIndexed: | 60 | case Opcode::SetAttributeIndexed: |
| 61 | case Opcode::SetFragColor: | ||
| 62 | case Opcode::SetFragDepth: | ||
| 60 | case Opcode::WriteGlobalU8: | 63 | case Opcode::WriteGlobalU8: |
| 61 | case Opcode::WriteGlobalS8: | 64 | case Opcode::WriteGlobalS8: |
| 62 | case Opcode::WriteGlobalU16: | 65 | case Opcode::WriteGlobalU16: |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9052a4903..593faca52 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, | |||
| 13 | OPCODE(LoopMerge, Void, Label, Label, ) | 13 | OPCODE(LoopMerge, Void, Label, Label, ) |
| 14 | OPCODE(SelectionMerge, Void, Label, ) | 14 | OPCODE(SelectionMerge, Void, Label, ) |
| 15 | OPCODE(Return, Void, ) | 15 | OPCODE(Return, Void, ) |
| 16 | OPCODE(DemoteToHelperInvocation, Void, Label, ) | ||
| 16 | 17 | ||
| 17 | // Context getters/setters | 18 | // Context getters/setters |
| 18 | OPCODE(GetRegister, U32, Reg, ) | 19 | OPCODE(GetRegister, U32, Reg, ) |
| @@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32, | |||
| 28 | OPCODE(GetCbufU32, U32, U32, U32, ) | 29 | OPCODE(GetCbufU32, U32, U32, U32, ) |
| 29 | OPCODE(GetCbufF32, F32, U32, U32, ) | 30 | OPCODE(GetCbufF32, F32, U32, U32, ) |
| 30 | OPCODE(GetCbufU64, U64, U32, U32, ) | 31 | OPCODE(GetCbufU64, U64, U32, U32, ) |
| 31 | OPCODE(GetAttribute, U32, Attribute, ) | 32 | OPCODE(GetAttribute, F32, Attribute, ) |
| 32 | OPCODE(SetAttribute, Void, Attribute, U32, ) | 33 | OPCODE(SetAttribute, Void, Attribute, F32, ) |
| 33 | OPCODE(GetAttributeIndexed, U32, U32, ) | 34 | OPCODE(GetAttributeIndexed, F32, U32, ) |
| 34 | OPCODE(SetAttributeIndexed, Void, U32, U32, ) | 35 | OPCODE(SetAttributeIndexed, Void, U32, F32, ) |
| 36 | OPCODE(SetFragColor, Void, U32, U32, F32, ) | ||
| 37 | OPCODE(SetFragDepth, Void, F32, ) | ||
| 35 | OPCODE(GetZFlag, U1, Void, ) | 38 | OPCODE(GetZFlag, U1, Void, ) |
| 36 | OPCODE(GetSFlag, U1, Void, ) | 39 | OPCODE(GetSFlag, U1, Void, ) |
| 37 | OPCODE(GetCFlag, U1, Void, ) | 40 | OPCODE(GetCFlag, U1, Void, ) |
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index bce8b19b3..733513c8b 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | 11 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 12 | #include "shader_recompiler/shader_info.h" | 12 | #include "shader_recompiler/shader_info.h" |
| 13 | #include "shader_recompiler/stage.h" | ||
| 13 | 14 | ||
| 14 | namespace Shader::IR { | 15 | namespace Shader::IR { |
| 15 | 16 | ||
| @@ -17,6 +18,7 @@ struct Program { | |||
| 17 | BlockList blocks; | 18 | BlockList blocks; |
| 18 | BlockList post_order_blocks; | 19 | BlockList post_order_blocks; |
| 19 | Info info; | 20 | Info info; |
| 21 | Stage stage{}; | ||
| 20 | }; | 22 | }; |
| 21 | 23 | ||
| 22 | [[nodiscard]] std::string DumpProgram(const Program& program); | 24 | [[nodiscard]] std::string DumpProgram(const Program& program); |
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 8fea05f7b..3845ec5fb 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h | |||
| @@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256; | |||
| 293 | return reg + (-num); | 293 | return reg + (-num); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | [[nodiscard]] constexpr Reg operator++(Reg& reg) { | 296 | constexpr Reg operator++(Reg& reg) { |
| 297 | reg = reg + 1; | 297 | reg = reg + 1; |
| 298 | return reg; | 298 | return reg; |
| 299 | } | 299 | } |
| 300 | 300 | ||
| 301 | [[nodiscard]] constexpr Reg operator++(Reg& reg, int) { | 301 | constexpr Reg operator++(Reg& reg, int) { |
| 302 | const Reg copy{reg}; | 302 | const Reg copy{reg}; |
| 303 | reg = reg + 1; | 303 | reg = reg + 1; |
| 304 | return copy; | 304 | return copy; |
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d..4f6707fae 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp | |||
| @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { | |||
| 104 | case Opcode::EXIT: | 104 | case Opcode::EXIT: |
| 105 | case Opcode::JMP: | 105 | case Opcode::JMP: |
| 106 | case Opcode::JMX: | 106 | case Opcode::JMX: |
| 107 | case Opcode::KIL: | ||
| 107 | case Opcode::BRK: | 108 | case Opcode::BRK: |
| 108 | case Opcode::CONT: | 109 | case Opcode::CONT: |
| 109 | case Opcode::LONGJMP: | 110 | case Opcode::LONGJMP: |
| @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati | |||
| 287 | block->end = pc; | 288 | block->end = pc; |
| 288 | return AnalysisState::Branch; | 289 | return AnalysisState::Branch; |
| 289 | } | 290 | } |
| 291 | case Opcode::KIL: { | ||
| 292 | const Predicate pred{inst.Pred()}; | ||
| 293 | const auto ir_pred{static_cast<IR::Pred>(pred.index)}; | ||
| 294 | const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; | ||
| 295 | AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); | ||
| 296 | return AnalysisState::Branch; | ||
| 297 | } | ||
| 290 | case Opcode::PBK: | 298 | case Opcode::PBK: |
| 291 | case Opcode::PCNT: | 299 | case Opcode::PCNT: |
| 292 | case Opcode::PEXIT: | 300 | case Opcode::PEXIT: |
| @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati | |||
| 324 | return AnalysisState::Continue; | 332 | return AnalysisState::Continue; |
| 325 | } | 333 | } |
| 326 | const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; | 334 | const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; |
| 327 | AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); | 335 | AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); |
| 328 | return AnalysisState::Branch; | 336 | return AnalysisState::Branch; |
| 329 | } | 337 | } |
| 330 | 338 | ||
| 331 | void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, | 339 | void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, |
| 332 | EndClass insn_end_class, IR::Condition cond, | 340 | EndClass insn_end_class, IR::Condition cond) { |
| 333 | bool visit_conditional_inst) { | ||
| 334 | if (block->begin != pc) { | 341 | if (block->begin != pc) { |
| 335 | // If the block doesn't start in the conditional instruction | 342 | // If the block doesn't start in the conditional instruction |
| 336 | // mark it as a label to visit it later | 343 | // mark it as a label to visit it later |
| @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, | |||
| 356 | // Impersonate the visited block with a virtual block | 363 | // Impersonate the visited block with a virtual block |
| 357 | *block = std::move(virtual_block); | 364 | *block = std::move(virtual_block); |
| 358 | // Set the end properties of the conditional instruction | 365 | // Set the end properties of the conditional instruction |
| 359 | conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; | 366 | conditional_block->end = pc + 1; |
| 360 | conditional_block->end_class = insn_end_class; | 367 | conditional_block->end_class = insn_end_class; |
| 361 | // Add a label to the instruction after the conditional instruction | 368 | // Add a label to the instruction after the conditional instruction |
| 362 | Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; | 369 | Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; |
| 363 | // Branch to the next instruction from the virtual block | 370 | // Branch to the next instruction from the virtual block |
| 364 | block->branch_false = endif_block; | 371 | block->branch_false = endif_block; |
| 365 | // And branch to it from the conditional instruction if it is a branch | 372 | // And branch to it from the conditional instruction if it is a branch or a kill instruction |
| 366 | if (insn_end_class == EndClass::Branch) { | 373 | // Kill instructions are considered a branch because they demote to a helper invocation and |
| 374 | // execution may continue. | ||
| 375 | if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { | ||
| 367 | conditional_block->cond = IR::Condition{true}; | 376 | conditional_block->cond = IR::Condition{true}; |
| 368 | conditional_block->branch_true = endif_block; | 377 | conditional_block->branch_true = endif_block; |
| 369 | conditional_block->branch_false = nullptr; | 378 | conditional_block->branch_false = nullptr; |
| @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati | |||
| 415 | throw NotImplementedException("Conditional EXIT with PEXIT token"); | 424 | throw NotImplementedException("Conditional EXIT with PEXIT token"); |
| 416 | } | 425 | } |
| 417 | const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; | 426 | const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; |
| 418 | AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); | 427 | AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); |
| 419 | return AnalysisState::Branch; | 428 | return AnalysisState::Branch; |
| 420 | } | 429 | } |
| 421 | if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { | 430 | if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { |
| @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati | |||
| 425 | block->branch_false = nullptr; | 434 | block->branch_false = nullptr; |
| 426 | return AnalysisState::Branch; | 435 | return AnalysisState::Branch; |
| 427 | } | 436 | } |
| 428 | block->end = pc; | 437 | block->end = pc + 1; |
| 429 | block->end_class = EndClass::Exit; | 438 | block->end_class = EndClass::Exit; |
| 430 | return AnalysisState::Branch; | 439 | return AnalysisState::Branch; |
| 431 | } | 440 | } |
| @@ -505,6 +514,12 @@ std::string CFG::Dot() const { | |||
| 505 | node_uid); | 514 | node_uid); |
| 506 | ++node_uid; | 515 | ++node_uid; |
| 507 | break; | 516 | break; |
| 517 | case EndClass::Kill: | ||
| 518 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 519 | dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", | ||
| 520 | node_uid); | ||
| 521 | ++node_uid; | ||
| 522 | break; | ||
| 508 | } | 523 | } |
| 509 | } | 524 | } |
| 510 | if (function.entrypoint == 8) { | 525 | if (function.entrypoint == 8) { |
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210f..22f134194 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h | |||
| @@ -29,6 +29,7 @@ enum class EndClass { | |||
| 29 | Call, | 29 | Call, |
| 30 | Exit, | 30 | Exit, |
| 31 | Return, | 31 | Return, |
| 32 | Kill, | ||
| 32 | }; | 33 | }; |
| 33 | 34 | ||
| 34 | enum class Token { | 35 | enum class Token { |
| @@ -130,7 +131,7 @@ private: | |||
| 130 | AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); | 131 | AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); |
| 131 | 132 | ||
| 132 | void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, | 133 | void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, |
| 133 | IR::Condition cond, bool visit_conditional_inst); | 134 | IR::Condition cond); |
| 134 | 135 | ||
| 135 | /// Return true when the branch instruction is confirmed to be a branch | 136 | /// Return true when the branch instruction is confirmed to be a branch |
| 136 | bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | 137 | bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa64326..0074eb89b 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 32 | IR::Program program; | 32 | IR::Program program; |
| 33 | program.blocks = VisitAST(inst_pool, block_pool, env, cfg); | 33 | program.blocks = VisitAST(inst_pool, block_pool, env, cfg); |
| 34 | program.post_order_blocks = PostOrder(program.blocks); | 34 | program.post_order_blocks = PostOrder(program.blocks); |
| 35 | program.stage = env.ShaderStage(); | ||
| 35 | RemoveUnreachableBlocks(program); | 36 | RemoveUnreachableBlocks(program); |
| 36 | 37 | ||
| 37 | // Replace instructions before the SSA rewrite | 38 | // Replace instructions before the SSA rewrite |
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 5f5d9cf17..cec03e73e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | |||
| @@ -45,6 +45,7 @@ enum class StatementType { | |||
| 45 | Loop, | 45 | Loop, |
| 46 | Break, | 46 | Break, |
| 47 | Return, | 47 | Return, |
| 48 | Kill, | ||
| 48 | Function, | 49 | Function, |
| 49 | Identity, | 50 | Identity, |
| 50 | Not, | 51 | Not, |
| @@ -70,6 +71,7 @@ struct If {}; | |||
| 70 | struct Loop {}; | 71 | struct Loop {}; |
| 71 | struct Break {}; | 72 | struct Break {}; |
| 72 | struct Return {}; | 73 | struct Return {}; |
| 74 | struct Kill {}; | ||
| 73 | struct FunctionTag {}; | 75 | struct FunctionTag {}; |
| 74 | struct Identity {}; | 76 | struct Identity {}; |
| 75 | struct Not {}; | 77 | struct Not {}; |
| @@ -93,6 +95,7 @@ struct Statement : ListBaseHook { | |||
| 93 | Statement(Break, Statement* cond_, Statement* up_) | 95 | Statement(Break, Statement* cond_, Statement* up_) |
| 94 | : cond{cond_}, up{up_}, type{StatementType::Break} {} | 96 | : cond{cond_}, up{up_}, type{StatementType::Break} {} |
| 95 | Statement(Return) : type{StatementType::Return} {} | 97 | Statement(Return) : type{StatementType::Return} {} |
| 98 | Statement(Kill) : type{StatementType::Kill} {} | ||
| 96 | Statement(FunctionTag) : children{}, type{StatementType::Function} {} | 99 | Statement(FunctionTag) : children{}, type{StatementType::Function} {} |
| 97 | Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} | 100 | Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} |
| 98 | Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} | 101 | Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} |
| @@ -174,6 +177,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { | |||
| 174 | case StatementType::Return: | 177 | case StatementType::Return: |
| 175 | ret += fmt::format("{} return;\n", indent); | 178 | ret += fmt::format("{} return;\n", indent); |
| 176 | break; | 179 | break; |
| 180 | case StatementType::Kill: | ||
| 181 | ret += fmt::format("{} kill;\n", indent); | ||
| 182 | break; | ||
| 177 | case StatementType::SetVariable: | 183 | case StatementType::SetVariable: |
| 178 | ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); | 184 | ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); |
| 179 | break; | 185 | break; |
| @@ -424,6 +430,9 @@ private: | |||
| 424 | gotos.push_back(root.insert(ip, *goto_stmt)); | 430 | gotos.push_back(root.insert(ip, *goto_stmt)); |
| 425 | break; | 431 | break; |
| 426 | } | 432 | } |
| 433 | case Flow::EndClass::Kill: | ||
| 434 | root.insert(ip, *pool.Create(Kill{})); | ||
| 435 | break; | ||
| 427 | } | 436 | } |
| 428 | } | 437 | } |
| 429 | } | 438 | } |
| @@ -729,6 +738,15 @@ private: | |||
| 729 | current_block = nullptr; | 738 | current_block = nullptr; |
| 730 | break; | 739 | break; |
| 731 | } | 740 | } |
| 741 | case StatementType::Kill: { | ||
| 742 | if (!current_block) { | ||
| 743 | current_block = block_pool.Create(inst_pool); | ||
| 744 | block_list.push_back(current_block); | ||
| 745 | } | ||
| 746 | IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); | ||
| 747 | current_block = nullptr; | ||
| 748 | break; | ||
| 749 | } | ||
| 732 | default: | 750 | default: |
| 733 | throw NotImplementedException("Statement type {}", stmt.type); | 751 | throw NotImplementedException("Statement type {}", stmt.type); |
| 734 | } | 752 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d1..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null | |||
| @@ -1,15 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | |||
| 11 | void TranslatorVisitor::EXIT(u64) { | ||
| 12 | ir.Exit(); | ||
| 13 | } | ||
| 14 | |||
| 15 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..ea9b33da9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ExitFragment(TranslatorVisitor& v) { | ||
| 12 | const ProgramHeader sph{v.env.SPH()}; | ||
| 13 | IR::Reg src_reg{IR::Reg::R0}; | ||
| 14 | for (u32 render_target = 0; render_target < 8; ++render_target) { | ||
| 15 | const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; | ||
| 16 | for (u32 component = 0; component < 4; ++component) { | ||
| 17 | if (!mask[component]) { | ||
| 18 | continue; | ||
| 19 | } | ||
| 20 | v.ir.SetFragColor(render_target, component, v.F(src_reg)); | ||
| 21 | ++src_reg; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (sph.ps.omap.sample_mask != 0) { | ||
| 25 | throw NotImplementedException("Sample mask"); | ||
| 26 | } | ||
| 27 | if (sph.ps.omap.depth != 0) { | ||
| 28 | throw NotImplementedException("Fragment depth"); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::EXIT() { | ||
| 34 | switch (env.ShaderStage()) { | ||
| 35 | case Stage::Fragment: | ||
| 36 | ExitFragment(*this); | ||
| 37 | break; | ||
| 38 | default: | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b..ed81d9c36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -108,7 +108,7 @@ public: | |||
| 108 | void DSETP_reg(u64 insn); | 108 | void DSETP_reg(u64 insn); |
| 109 | void DSETP_cbuf(u64 insn); | 109 | void DSETP_cbuf(u64 insn); |
| 110 | void DSETP_imm(u64 insn); | 110 | void DSETP_imm(u64 insn); |
| 111 | void EXIT(u64 insn); | 111 | void EXIT(); |
| 112 | void F2F_reg(u64 insn); | 112 | void F2F_reg(u64 insn); |
| 113 | void F2F_cbuf(u64 insn); | 113 | void F2F_cbuf(u64 insn); |
| 114 | void F2F_imm(u64 insn); | 114 | void F2F_imm(u64 insn); |
| @@ -220,7 +220,7 @@ public: | |||
| 220 | void JCAL(u64 insn); | 220 | void JCAL(u64 insn); |
| 221 | void JMP(u64 insn); | 221 | void JMP(u64 insn); |
| 222 | void JMX(u64 insn); | 222 | void JMX(u64 insn); |
| 223 | void KIL(u64 insn); | 223 | void KIL(); |
| 224 | void LD(u64 insn); | 224 | void LD(u64 insn); |
| 225 | void LDC(u64 insn); | 225 | void LDC(u64 insn); |
| 226 | void LDG(u64 insn); | 226 | void LDG(u64 insn); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4..2922145ee 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp | |||
| @@ -11,6 +11,13 @@ | |||
| 11 | 11 | ||
| 12 | namespace Shader::Maxwell { | 12 | namespace Shader::Maxwell { |
| 13 | namespace { | 13 | namespace { |
| 14 | enum class Size : u64 { | ||
| 15 | B32, | ||
| 16 | B64, | ||
| 17 | B96, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 14 | enum class InterpolationMode : u64 { | 21 | enum class InterpolationMode : u64 { |
| 15 | Pass, | 22 | Pass, |
| 16 | Multiply, | 23 | Multiply, |
| @@ -23,8 +30,85 @@ enum class SampleMode : u64 { | |||
| 23 | Centroid, | 30 | Centroid, |
| 24 | Offset, | 31 | Offset, |
| 25 | }; | 32 | }; |
| 33 | |||
| 34 | int NumElements(Size size) { | ||
| 35 | switch (size) { | ||
| 36 | case Size::B32: | ||
| 37 | return 1; | ||
| 38 | case Size::B64: | ||
| 39 | return 2; | ||
| 40 | case Size::B96: | ||
| 41 | return 3; | ||
| 42 | case Size::B128: | ||
| 43 | return 4; | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid size {}", size); | ||
| 46 | } | ||
| 26 | } // Anonymous namespace | 47 | } // Anonymous namespace |
| 27 | 48 | ||
| 49 | void TranslatorVisitor::ALD(u64 insn) { | ||
| 50 | union { | ||
| 51 | u64 raw; | ||
| 52 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 53 | BitField<8, 8, IR::Reg> index_reg; | ||
| 54 | BitField<20, 10, u64> absolute_offset; | ||
| 55 | BitField<20, 11, s64> relative_offset; | ||
| 56 | BitField<39, 8, IR::Reg> stream_reg; | ||
| 57 | BitField<32, 1, u64> o; | ||
| 58 | BitField<31, 1, u64> patch; | ||
| 59 | BitField<47, 2, Size> size; | ||
| 60 | } const ald{insn}; | ||
| 61 | |||
| 62 | if (ald.o != 0) { | ||
| 63 | throw NotImplementedException("O"); | ||
| 64 | } | ||
| 65 | if (ald.patch != 0) { | ||
| 66 | throw NotImplementedException("P"); | ||
| 67 | } | ||
| 68 | if (ald.index_reg != IR::Reg::RZ) { | ||
| 69 | throw NotImplementedException("Indexed"); | ||
| 70 | } | ||
| 71 | const u64 offset{ald.absolute_offset.Value()}; | ||
| 72 | if (offset % 4 != 0) { | ||
| 73 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 74 | } | ||
| 75 | const int num_elements{NumElements(ald.size)}; | ||
| 76 | for (int element = 0; element < num_elements; ++element) { | ||
| 77 | F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void TranslatorVisitor::AST(u64 insn) { | ||
| 82 | union { | ||
| 83 | u64 raw; | ||
| 84 | BitField<0, 8, IR::Reg> src_reg; | ||
| 85 | BitField<8, 8, IR::Reg> index_reg; | ||
| 86 | BitField<20, 10, u64> absolute_offset; | ||
| 87 | BitField<20, 11, s64> relative_offset; | ||
| 88 | BitField<31, 1, u64> patch; | ||
| 89 | BitField<39, 8, IR::Reg> stream_reg; | ||
| 90 | BitField<47, 2, Size> size; | ||
| 91 | } const ast{insn}; | ||
| 92 | |||
| 93 | if (ast.patch != 0) { | ||
| 94 | throw NotImplementedException("P"); | ||
| 95 | } | ||
| 96 | if (ast.stream_reg != IR::Reg::RZ) { | ||
| 97 | throw NotImplementedException("Stream store"); | ||
| 98 | } | ||
| 99 | if (ast.index_reg != IR::Reg::RZ) { | ||
| 100 | throw NotImplementedException("Indexed store"); | ||
| 101 | } | ||
| 102 | const u64 offset{ast.absolute_offset.Value()}; | ||
| 103 | if (offset % 4 != 0) { | ||
| 104 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 105 | } | ||
| 106 | const int num_elements{NumElements(ast.size)}; | ||
| 107 | for (int element = 0; element < num_elements; ++element) { | ||
| 108 | ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 28 | void TranslatorVisitor::IPA(u64 insn) { | 112 | void TranslatorVisitor::IPA(u64 insn) { |
| 29 | // IPA is the instruction used to read varyings from a fragment shader. | 113 | // IPA is the instruction used to read varyings from a fragment shader. |
| 30 | // gl_FragCoord is mapped to the gl_Position attribute. | 114 | // gl_FragCoord is mapped to the gl_Position attribute. |
| @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { | |||
| 51 | // } | 135 | // } |
| 52 | const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; | 136 | const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; |
| 53 | if (is_indexed) { | 137 | if (is_indexed) { |
| 54 | throw NotImplementedException("IPA.IDX"); | 138 | throw NotImplementedException("IDX"); |
| 55 | } | 139 | } |
| 56 | 140 | ||
| 57 | const IR::Attribute attribute{ipa.attribute}; | 141 | const IR::Attribute attribute{ipa.attribute}; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef54..59252bcc5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { | |||
| 17 | ThrowNotImplemented(Opcode::AL2P); | 17 | ThrowNotImplemented(Opcode::AL2P); |
| 18 | } | 18 | } |
| 19 | 19 | ||
| 20 | void TranslatorVisitor::ALD(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ALD); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::AST(u64) { | ||
| 25 | ThrowNotImplemented(Opcode::AST); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::ATOM_cas(u64) { | 20 | void TranslatorVisitor::ATOM_cas(u64) { |
| 29 | ThrowNotImplemented(Opcode::ATOM_cas); | 21 | ThrowNotImplemented(Opcode::ATOM_cas); |
| 30 | } | 22 | } |
| @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { | |||
| 153 | ThrowNotImplemented(Opcode::DSETP_imm); | 145 | ThrowNotImplemented(Opcode::DSETP_imm); |
| 154 | } | 146 | } |
| 155 | 147 | ||
| 156 | void TranslatorVisitor::EXIT(u64) { | ||
| 157 | throw LogicError("Visting EXIT instruction"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void TranslatorVisitor::F2F_reg(u64) { | 148 | void TranslatorVisitor::F2F_reg(u64) { |
| 161 | ThrowNotImplemented(Opcode::F2F_reg); | 149 | ThrowNotImplemented(Opcode::F2F_reg); |
| 162 | } | 150 | } |
| @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { | |||
| 345 | ThrowNotImplemented(Opcode::JMX); | 333 | ThrowNotImplemented(Opcode::JMX); |
| 346 | } | 334 | } |
| 347 | 335 | ||
| 348 | void TranslatorVisitor::KIL(u64) { | 336 | void TranslatorVisitor::KIL() { |
| 349 | ThrowNotImplemented(Opcode::KIL); | 337 | // KIL is a no-op |
| 350 | } | 338 | } |
| 351 | 339 | ||
| 352 | void TranslatorVisitor::LD(u64) { | 340 | void TranslatorVisitor::LD(u64) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c64..0fbb87ec4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { | |||
| 215 | BitField<36, 13, u64> cbuf_offset; | 215 | BitField<36, 13, u64> cbuf_offset; |
| 216 | } const tex{insn}; | 216 | } const tex{insn}; |
| 217 | 217 | ||
| 218 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset)); | 218 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); |
| 219 | } | 219 | } |
| 220 | 220 | ||
| 221 | void TranslatorVisitor::TEX_b(u64 insn) { | 221 | void TranslatorVisitor::TEX_b(u64 insn) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b00..54f0df754 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | |||
| @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { | |||
| 70 | 70 | ||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { |
| 72 | const Encoding texs{insn}; | 72 | const Encoding texs{insn}; |
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))}; | 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; |
| 74 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | 74 | const IR::F32 zero{v.ir.Imm32(0.0f)}; |
| 75 | const IR::Reg reg_a{texs.src_reg_a}; | 75 | const IR::Reg reg_a{texs.src_reg_a}; |
| 76 | const IR::Reg reg_b{texs.src_reg_b}; | 76 | const IR::Reg reg_b{texs.src_reg_b}; |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 708b6b267..fbbe28632 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { | |||
| 17 | return; | 17 | return; |
| 18 | } | 18 | } |
| 19 | info.constant_buffer_mask |= 1U << index; | 19 | info.constant_buffer_mask |= 1U << index; |
| 20 | info.constant_buffer_descriptors.push_back({ | 20 | |
| 21 | .index{index}, | 21 | auto& cbufs{info.constant_buffer_descriptors}; |
| 22 | .count{1}, | 22 | cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), |
| 23 | }); | 23 | ConstantBufferDescriptor{ |
| 24 | .index{index}, | ||
| 25 | .count{1}, | ||
| 26 | }); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GetAttribute(Info& info, IR::Attribute attribute) { | ||
| 30 | if (IR::IsGeneric(attribute)) { | ||
| 31 | info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; | ||
| 32 | return; | ||
| 33 | } | ||
| 34 | switch (attribute) { | ||
| 35 | case IR::Attribute::PositionX: | ||
| 36 | case IR::Attribute::PositionY: | ||
| 37 | case IR::Attribute::PositionZ: | ||
| 38 | case IR::Attribute::PositionW: | ||
| 39 | info.loads_position = true; | ||
| 40 | break; | ||
| 41 | default: | ||
| 42 | throw NotImplementedException("Get attribute {}", attribute); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetAttribute(Info& info, IR::Attribute attribute) { | ||
| 47 | if (IR::IsGeneric(attribute)) { | ||
| 48 | info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; | ||
| 49 | return; | ||
| 50 | } | ||
| 51 | switch (attribute) { | ||
| 52 | case IR::Attribute::PositionX: | ||
| 53 | case IR::Attribute::PositionY: | ||
| 54 | case IR::Attribute::PositionZ: | ||
| 55 | case IR::Attribute::PositionW: | ||
| 56 | info.stores_position = true; | ||
| 57 | break; | ||
| 58 | default: | ||
| 59 | throw NotImplementedException("Set attribute {}", attribute); | ||
| 60 | } | ||
| 24 | } | 61 | } |
| 25 | 62 | ||
| 26 | void VisitUsages(Info& info, IR::Inst& inst) { | 63 | void VisitUsages(Info& info, IR::Inst& inst) { |
| @@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 162 | break; | 199 | break; |
| 163 | } | 200 | } |
| 164 | switch (inst.Opcode()) { | 201 | switch (inst.Opcode()) { |
| 202 | case IR::Opcode::DemoteToHelperInvocation: | ||
| 203 | info.uses_demote_to_helper_invocation = true; | ||
| 204 | break; | ||
| 205 | case IR::Opcode::GetAttribute: | ||
| 206 | GetAttribute(info, inst.Arg(0).Attribute()); | ||
| 207 | break; | ||
| 208 | case IR::Opcode::SetAttribute: | ||
| 209 | SetAttribute(info, inst.Arg(0).Attribute()); | ||
| 210 | break; | ||
| 211 | case IR::Opcode::SetFragColor: | ||
| 212 | info.stores_frag_color[inst.Arg(0).U32()] = true; | ||
| 213 | break; | ||
| 214 | case IR::Opcode::SetFragDepth: | ||
| 215 | info.stores_frag_depth = true; | ||
| 216 | break; | ||
| 165 | case IR::Opcode::WorkgroupId: | 217 | case IR::Opcode::WorkgroupId: |
| 166 | info.uses_workgroup_id = true; | 218 | info.uses_workgroup_id = true; |
| 167 | break; | 219 | break; |
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index d09bcec36..bab7ca186 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | |||
| @@ -169,7 +169,7 @@ private: | |||
| 169 | const size_t num_args{phi.NumArgs()}; | 169 | const size_t num_args{phi.NumArgs()}; |
| 170 | for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { | 170 | for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { |
| 171 | const IR::Value& op{phi.Arg(arg_index)}; | 171 | const IR::Value& op{phi.Arg(arg_index)}; |
| 172 | if (op == same || op == IR::Value{&phi}) { | 172 | if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { |
| 173 | // Unique value or self-reference | 173 | // Unique value or self-reference |
| 174 | continue; | 174 | continue; |
| 175 | } | 175 | } |
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..1544bfa42 --- /dev/null +++ b/src/shader_recompiler/program_header.h | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Shader { | ||
| 15 | |||
| 16 | enum class OutputTopology : u32 { | ||
| 17 | PointList = 1, | ||
| 18 | LineStrip = 6, | ||
| 19 | TriangleStrip = 7, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class PixelImap : u8 { | ||
| 23 | Unused = 0, | ||
| 24 | Constant = 1, | ||
| 25 | Perspective = 2, | ||
| 26 | ScreenLinear = 3, | ||
| 27 | }; | ||
| 28 | |||
| 29 | // Documentation in: | ||
| 30 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html | ||
| 31 | struct ProgramHeader { | ||
| 32 | union { | ||
| 33 | BitField<0, 5, u32> sph_type; | ||
| 34 | BitField<5, 5, u32> version; | ||
| 35 | BitField<10, 4, u32> shader_type; | ||
| 36 | BitField<14, 1, u32> mrt_enable; | ||
| 37 | BitField<15, 1, u32> kills_pixels; | ||
| 38 | BitField<16, 1, u32> does_global_store; | ||
| 39 | BitField<17, 4, u32> sass_version; | ||
| 40 | BitField<21, 5, u32> reserved; | ||
| 41 | BitField<26, 1, u32> does_load_or_store; | ||
| 42 | BitField<27, 1, u32> does_fp64; | ||
| 43 | BitField<28, 4, u32> stream_out_mask; | ||
| 44 | } common0; | ||
| 45 | |||
| 46 | union { | ||
| 47 | BitField<0, 24, u32> shader_local_memory_low_size; | ||
| 48 | BitField<24, 8, u32> per_patch_attribute_count; | ||
| 49 | } common1; | ||
| 50 | |||
| 51 | union { | ||
| 52 | BitField<0, 24, u32> shader_local_memory_high_size; | ||
| 53 | BitField<24, 8, u32> threads_per_input_primitive; | ||
| 54 | } common2; | ||
| 55 | |||
| 56 | union { | ||
| 57 | BitField<0, 24, u32> shader_local_memory_crs_size; | ||
| 58 | BitField<24, 4, OutputTopology> output_topology; | ||
| 59 | BitField<28, 4, u32> reserved; | ||
| 60 | } common3; | ||
| 61 | |||
| 62 | union { | ||
| 63 | BitField<0, 12, u32> max_output_vertices; | ||
| 64 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||
| 65 | BitField<20, 4, u32> reserved; | ||
| 66 | BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||
| 67 | } common4; | ||
| 68 | |||
| 69 | union { | ||
| 70 | struct { | ||
| 71 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 72 | INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB | ||
| 73 | INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] | ||
| 74 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 75 | union { | ||
| 76 | BitField<0, 8, u16> clip_distances; | ||
| 77 | BitField<8, 1, u16> point_sprite_s; | ||
| 78 | BitField<9, 1, u16> point_sprite_t; | ||
| 79 | BitField<10, 1, u16> fog_coordinate; | ||
| 80 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 81 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 82 | BitField<14, 1, u16> instance_id; | ||
| 83 | BitField<15, 1, u16> vertex_id; | ||
| 84 | }; | ||
| 85 | INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] | ||
| 86 | INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved | ||
| 87 | INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA | ||
| 88 | INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB | ||
| 89 | INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] | ||
| 90 | INSERT_PADDING_BYTES_NOINIT(2); // OmapColor | ||
| 91 | INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC | ||
| 92 | INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] | ||
| 93 | INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved | ||
| 94 | } vtg; | ||
| 95 | |||
| 96 | struct { | ||
| 97 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 98 | INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB | ||
| 99 | |||
| 100 | union { | ||
| 101 | BitField<0, 2, PixelImap> x; | ||
| 102 | BitField<2, 2, PixelImap> y; | ||
| 103 | BitField<4, 2, PixelImap> z; | ||
| 104 | BitField<6, 2, PixelImap> w; | ||
| 105 | u8 raw; | ||
| 106 | } imap_generic_vector[32]; | ||
| 107 | |||
| 108 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 109 | INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC | ||
| 110 | INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] | ||
| 111 | INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved | ||
| 112 | |||
| 113 | struct { | ||
| 114 | u32 target; | ||
| 115 | union { | ||
| 116 | BitField<0, 1, u32> sample_mask; | ||
| 117 | BitField<1, 1, u32> depth; | ||
| 118 | BitField<2, 30, u32> reserved; | ||
| 119 | }; | ||
| 120 | } omap; | ||
| 121 | |||
| 122 | [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept { | ||
| 123 | const u32 bits{omap.target >> (rt * 4)}; | ||
| 124 | return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; | ||
| 125 | } | ||
| 126 | |||
| 127 | [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const { | ||
| 128 | const auto& vector{imap_generic_vector[attribute]}; | ||
| 129 | return {vector.x, vector.y, vector.z, vector.w}; | ||
| 130 | } | ||
| 131 | } ps; | ||
| 132 | |||
| 133 | std::array<u32, 0xf> raw; | ||
| 134 | }; | ||
| 135 | |||
| 136 | [[nodiscard]] u64 LocalMemorySize() const noexcept { | ||
| 137 | return (common1.shader_local_memory_low_size | | ||
| 138 | (common2.shader_local_memory_high_size << 24)); | ||
| 139 | } | ||
| 140 | }; | ||
| 141 | static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); | ||
| 142 | |||
| 143 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp deleted file mode 100644 index 527e19c27..000000000 --- a/src/shader_recompiler/recompiler.cpp +++ /dev/null | |||
| @@ -1,28 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 9 | #include "shader_recompiler/environment.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/program.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | #include "shader_recompiler/recompiler.h" | ||
| 14 | |||
| 15 | namespace Shader { | ||
| 16 | |||
| 17 | std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env, | ||
| 18 | u32 start_address) { | ||
| 19 | ObjectPool<Maxwell::Flow::Block> flow_block_pool; | ||
| 20 | ObjectPool<IR::Inst> inst_pool; | ||
| 21 | ObjectPool<IR::Block> block_pool; | ||
| 22 | |||
| 23 | Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; | ||
| 24 | IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; | ||
| 25 | return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h deleted file mode 100644 index 2529463ae..000000000 --- a/src/shader_recompiler/recompiler.h +++ /dev/null | |||
| @@ -1,20 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/profile.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | |||
| 15 | namespace Shader { | ||
| 16 | |||
| 17 | [[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, | ||
| 18 | Environment& env, u32 start_address); | ||
| 19 | |||
| 20 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index adc1d9a64..6eff762e2 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -56,6 +56,15 @@ struct Info { | |||
| 56 | 56 | ||
| 57 | bool uses_workgroup_id{}; | 57 | bool uses_workgroup_id{}; |
| 58 | bool uses_local_invocation_id{}; | 58 | bool uses_local_invocation_id{}; |
| 59 | |||
| 60 | std::array<bool, 32> loads_generics{}; | ||
| 61 | bool loads_position{}; | ||
| 62 | |||
| 63 | std::array<bool, 8> stores_frag_color{}; | ||
| 64 | bool stores_frag_depth{}; | ||
| 65 | std::array<bool, 32> stores_generics{}; | ||
| 66 | bool stores_position{}; | ||
| 67 | |||
| 59 | bool uses_fp16{}; | 68 | bool uses_fp16{}; |
| 60 | bool uses_fp64{}; | 69 | bool uses_fp64{}; |
| 61 | bool uses_fp16_denorms_flush{}; | 70 | bool uses_fp16_denorms_flush{}; |
| @@ -68,6 +77,7 @@ struct Info { | |||
| 68 | bool uses_image_1d{}; | 77 | bool uses_image_1d{}; |
| 69 | bool uses_sampled_1d{}; | 78 | bool uses_sampled_1d{}; |
| 70 | bool uses_sparse_residency{}; | 79 | bool uses_sparse_residency{}; |
| 80 | bool uses_demote_to_helper_invocation{}; | ||
| 71 | 81 | ||
| 72 | IR::Type used_constant_buffer_types{}; | 82 | IR::Type used_constant_buffer_types{}; |
| 73 | 83 | ||
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..fc6ce6043 --- /dev/null +++ b/src/shader_recompiler/stage.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Shader { | ||
| 8 | |||
| 9 | enum class Stage { | ||
| 10 | Compute, | ||
| 11 | VertexA, | ||
| 12 | VertexB, | ||
| 13 | TessellationControl, | ||
| 14 | TessellationEval, | ||
| 15 | Geometry, | ||
| 16 | Fragment, | ||
| 17 | }; | ||
| 18 | |||
| 19 | } // namespace Shader | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3323e6916..71b07c194 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -100,6 +100,7 @@ add_library(video_core STATIC | |||
| 100 | renderer_vulkan/fixed_pipeline_state.h | 100 | renderer_vulkan/fixed_pipeline_state.h |
| 101 | renderer_vulkan/maxwell_to_vk.cpp | 101 | renderer_vulkan/maxwell_to_vk.cpp |
| 102 | renderer_vulkan/maxwell_to_vk.h | 102 | renderer_vulkan/maxwell_to_vk.h |
| 103 | renderer_vulkan/pipeline_helper.h | ||
| 103 | renderer_vulkan/renderer_vulkan.h | 104 | renderer_vulkan/renderer_vulkan.h |
| 104 | renderer_vulkan/renderer_vulkan.cpp | 105 | renderer_vulkan/renderer_vulkan.cpp |
| 105 | renderer_vulkan/vk_blit_screen.cpp | 106 | renderer_vulkan/vk_blit_screen.cpp |
| @@ -116,15 +117,18 @@ add_library(video_core STATIC | |||
| 116 | renderer_vulkan/vk_descriptor_pool.h | 117 | renderer_vulkan/vk_descriptor_pool.h |
| 117 | renderer_vulkan/vk_fence_manager.cpp | 118 | renderer_vulkan/vk_fence_manager.cpp |
| 118 | renderer_vulkan/vk_fence_manager.h | 119 | renderer_vulkan/vk_fence_manager.h |
| 120 | renderer_vulkan/vk_graphics_pipeline.cpp | ||
| 121 | renderer_vulkan/vk_graphics_pipeline.h | ||
| 119 | renderer_vulkan/vk_master_semaphore.cpp | 122 | renderer_vulkan/vk_master_semaphore.cpp |
| 120 | renderer_vulkan/vk_master_semaphore.h | 123 | renderer_vulkan/vk_master_semaphore.h |
| 121 | renderer_vulkan/vk_pipeline_cache.cpp | 124 | renderer_vulkan/vk_pipeline_cache.cpp |
| 122 | renderer_vulkan/vk_pipeline_cache.h | 125 | renderer_vulkan/vk_pipeline_cache.h |
| 123 | renderer_vulkan/vk_pipeline.h | ||
| 124 | renderer_vulkan/vk_query_cache.cpp | 126 | renderer_vulkan/vk_query_cache.cpp |
| 125 | renderer_vulkan/vk_query_cache.h | 127 | renderer_vulkan/vk_query_cache.h |
| 126 | renderer_vulkan/vk_rasterizer.cpp | 128 | renderer_vulkan/vk_rasterizer.cpp |
| 127 | renderer_vulkan/vk_rasterizer.h | 129 | renderer_vulkan/vk_rasterizer.h |
| 130 | renderer_vulkan/vk_render_pass_cache.cpp | ||
| 131 | renderer_vulkan/vk_render_pass_cache.h | ||
| 128 | renderer_vulkan/vk_resource_pool.cpp | 132 | renderer_vulkan/vk_resource_pool.cpp |
| 129 | renderer_vulkan/vk_resource_pool.h | 133 | renderer_vulkan/vk_resource_pool.h |
| 130 | renderer_vulkan/vk_scheduler.cpp | 134 | renderer_vulkan/vk_scheduler.cpp |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d8f683907 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 72 | regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; | 72 | regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; |
| 73 | alpha_test_func.Assign(PackComparisonOp(test_func)); | 73 | alpha_test_func.Assign(PackComparisonOp(test_func)); |
| 74 | early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); | 74 | early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); |
| 75 | depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); | ||
| 76 | depth_format.Assign(static_cast<u32>(regs.zeta.format)); | ||
| 77 | std::ranges::transform(regs.rt, color_formats.begin(), | ||
| 78 | [](const auto& rt) { return static_cast<u8>(rt.format); }); | ||
| 75 | 79 | ||
| 76 | alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); | 80 | alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); |
| 77 | point_size = Common::BitCast<u32>(regs.point_size); | 81 | point_size = Common::BitCast<u32>(regs.point_size); |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..348f1d6ce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -60,7 +60,7 @@ struct FixedPipelineState { | |||
| 60 | 60 | ||
| 61 | void Refresh(const Maxwell& regs, size_t index); | 61 | void Refresh(const Maxwell& regs, size_t index); |
| 62 | 62 | ||
| 63 | constexpr std::array<bool, 4> Mask() const noexcept { | 63 | std::array<bool, 4> Mask() const noexcept { |
| 64 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; | 64 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| @@ -97,11 +97,11 @@ struct FixedPipelineState { | |||
| 97 | BitField<20, 3, u32> type; | 97 | BitField<20, 3, u32> type; |
| 98 | BitField<23, 6, u32> size; | 98 | BitField<23, 6, u32> size; |
| 99 | 99 | ||
| 100 | constexpr Maxwell::VertexAttribute::Type Type() const noexcept { | 100 | Maxwell::VertexAttribute::Type Type() const noexcept { |
| 101 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); | 101 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | constexpr Maxwell::VertexAttribute::Size Size() const noexcept { | 104 | Maxwell::VertexAttribute::Size Size() const noexcept { |
| 105 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); | 105 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); |
| 106 | } | 106 | } |
| 107 | }; | 107 | }; |
| @@ -187,7 +187,10 @@ struct FixedPipelineState { | |||
| 187 | u32 raw2; | 187 | u32 raw2; |
| 188 | BitField<0, 3, u32> alpha_test_func; | 188 | BitField<0, 3, u32> alpha_test_func; |
| 189 | BitField<3, 1, u32> early_z; | 189 | BitField<3, 1, u32> early_z; |
| 190 | BitField<4, 1, u32> depth_enabled; | ||
| 191 | BitField<5, 5, u32> depth_format; | ||
| 190 | }; | 192 | }; |
| 193 | std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||
| 191 | 194 | ||
| 192 | u32 alpha_test_ref; | 195 | u32 alpha_test_ref; |
| 193 | u32 point_size; | 196 | u32 point_size; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..dc4ff0da2 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti | |||
| 741 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | 741 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; |
| 742 | } | 742 | } |
| 743 | 743 | ||
| 744 | VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 745 | switch (msaa_mode) { | ||
| 746 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 747 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 748 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 749 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 750 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 751 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 752 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 753 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 754 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 755 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 756 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 757 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 758 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 759 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 760 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 761 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 762 | default: | ||
| 763 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 764 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 765 | } | ||
| 766 | } | ||
| 767 | |||
| 744 | } // namespace Vulkan::MaxwellToVK | 768 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..9f78e15b6 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); | |||
| 71 | 71 | ||
| 72 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); | 72 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); |
| 73 | 73 | ||
| 74 | VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); | ||
| 75 | |||
| 74 | } // namespace Vulkan::MaxwellToVK | 76 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..0a59aa659 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h | |||
| @@ -0,0 +1,162 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include <boost/container/small_vector.hpp> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 16 | #include "video_core/texture_cache/texture_cache.h" | ||
| 17 | #include "video_core/texture_cache/types.h" | ||
| 18 | #include "video_core/textures/texture.h" | ||
| 19 | |||
| 20 | namespace Vulkan { | ||
| 21 | |||
| 22 | struct TextureHandle { | ||
| 23 | explicit TextureHandle(u32 data, bool via_header_index) { | ||
| 24 | [[likely]] if (via_header_index) { | ||
| 25 | image = data; | ||
| 26 | sampler = data; | ||
| 27 | } else { | ||
| 28 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 29 | image = handle.tic_id; | ||
| 30 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | u32 image; | ||
| 35 | u32 sampler; | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct DescriptorLayoutTuple { | ||
| 39 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 40 | vk::PipelineLayout pipeline_layout; | ||
| 41 | vk::DescriptorUpdateTemplateKHR descriptor_update_template; | ||
| 42 | }; | ||
| 43 | |||
| 44 | class DescriptorLayoutBuilder { | ||
| 45 | public: | ||
| 46 | DescriptorLayoutTuple Create(const vk::Device& device) { | ||
| 47 | DescriptorLayoutTuple result; | ||
| 48 | if (!bindings.empty()) { | ||
| 49 | result.descriptor_set_layout = device.CreateDescriptorSetLayout({ | ||
| 50 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 51 | .pNext = nullptr, | ||
| 52 | .flags = 0, | ||
| 53 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 54 | .pBindings = bindings.data(), | ||
| 55 | }); | ||
| 56 | } | ||
| 57 | result.pipeline_layout = device.CreatePipelineLayout({ | ||
| 58 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 59 | .pNext = nullptr, | ||
| 60 | .flags = 0, | ||
| 61 | .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, | ||
| 62 | .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), | ||
| 63 | .pushConstantRangeCount = 0, | ||
| 64 | .pPushConstantRanges = nullptr, | ||
| 65 | }); | ||
| 66 | if (!entries.empty()) { | ||
| 67 | result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ | ||
| 68 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 69 | .pNext = nullptr, | ||
| 70 | .flags = 0, | ||
| 71 | .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), | ||
| 72 | .pDescriptorUpdateEntries = entries.data(), | ||
| 73 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 74 | .descriptorSetLayout = *result.descriptor_set_layout, | ||
| 75 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 76 | .pipelineLayout = *result.pipeline_layout, | ||
| 77 | .set = 0, | ||
| 78 | }); | ||
| 79 | } | ||
| 80 | return result; | ||
| 81 | } | ||
| 82 | |||
| 83 | void Add(const Shader::Info& info, VkShaderStageFlags stage) { | ||
| 84 | for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { | ||
| 85 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); | ||
| 86 | } | ||
| 87 | for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { | ||
| 88 | Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); | ||
| 89 | } | ||
| 90 | for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { | ||
| 91 | Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | private: | ||
| 96 | void Add(VkDescriptorType type, VkShaderStageFlags stage) { | ||
| 97 | bindings.push_back({ | ||
| 98 | .binding = binding, | ||
| 99 | .descriptorType = type, | ||
| 100 | .descriptorCount = 1, | ||
| 101 | .stageFlags = stage, | ||
| 102 | .pImmutableSamplers = nullptr, | ||
| 103 | }); | ||
| 104 | entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ | ||
| 105 | .dstBinding = binding, | ||
| 106 | .dstArrayElement = 0, | ||
| 107 | .descriptorCount = 1, | ||
| 108 | .descriptorType = type, | ||
| 109 | .offset = offset, | ||
| 110 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 111 | }); | ||
| 112 | ++binding; | ||
| 113 | offset += sizeof(DescriptorUpdateEntry); | ||
| 114 | } | ||
| 115 | |||
| 116 | boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; | ||
| 117 | boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; | ||
| 118 | u32 binding{}; | ||
| 119 | size_t offset{}; | ||
| 120 | }; | ||
| 121 | |||
| 122 | inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { | ||
| 123 | switch (type) { | ||
| 124 | case Shader::TextureType::Color1D: | ||
| 125 | case Shader::TextureType::Shadow1D: | ||
| 126 | return VideoCommon::ImageViewType::e1D; | ||
| 127 | case Shader::TextureType::ColorArray1D: | ||
| 128 | case Shader::TextureType::ShadowArray1D: | ||
| 129 | return VideoCommon::ImageViewType::e1DArray; | ||
| 130 | case Shader::TextureType::Color2D: | ||
| 131 | case Shader::TextureType::Shadow2D: | ||
| 132 | return VideoCommon::ImageViewType::e2D; | ||
| 133 | case Shader::TextureType::ColorArray2D: | ||
| 134 | case Shader::TextureType::ShadowArray2D: | ||
| 135 | return VideoCommon::ImageViewType::e2DArray; | ||
| 136 | case Shader::TextureType::Color3D: | ||
| 137 | case Shader::TextureType::Shadow3D: | ||
| 138 | return VideoCommon::ImageViewType::e3D; | ||
| 139 | case Shader::TextureType::ColorCube: | ||
| 140 | case Shader::TextureType::ShadowCube: | ||
| 141 | return VideoCommon::ImageViewType::Cube; | ||
| 142 | case Shader::TextureType::ColorArrayCube: | ||
| 143 | case Shader::TextureType::ShadowArrayCube: | ||
| 144 | return VideoCommon::ImageViewType::CubeArray; | ||
| 145 | } | ||
| 146 | UNREACHABLE_MSG("Invalid texture type {}", type); | ||
| 147 | return {}; | ||
| 148 | } | ||
| 149 | |||
| 150 | inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, | ||
| 151 | const ImageId* image_view_ids, TextureCache& texture_cache, | ||
| 152 | VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { | ||
| 153 | for (const auto& desc : info.texture_descriptors) { | ||
| 154 | const VkSampler sampler{samplers[index]}; | ||
| 155 | ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; | ||
| 156 | const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; | ||
| 157 | update_descriptor_queue.AddSampledImage(vk_image_view, sampler); | ||
| 158 | ++index; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ff..6684d37a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <boost/container/small_vector.hpp> | 7 | #include <boost/container/small_vector.hpp> |
| 8 | 8 | ||
| 9 | #include "video_core/renderer_vulkan/pipeline_helper.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 11 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| @@ -17,140 +18,10 @@ | |||
| 17 | 18 | ||
| 18 | namespace Vulkan { | 19 | namespace Vulkan { |
| 19 | namespace { | 20 | namespace { |
| 20 | vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { | 21 | DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { |
| 21 | boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings; | 22 | DescriptorLayoutBuilder builder; |
| 22 | u32 binding{}; | 23 | builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); |
| 23 | for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { | 24 | return builder.Create(device.GetLogical()); |
| 24 | bindings.push_back({ | ||
| 25 | .binding = binding, | ||
| 26 | .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, | ||
| 27 | .descriptorCount = 1, | ||
| 28 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 29 | .pImmutableSamplers = nullptr, | ||
| 30 | }); | ||
| 31 | ++binding; | ||
| 32 | } | ||
| 33 | for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { | ||
| 34 | bindings.push_back({ | ||
| 35 | .binding = binding, | ||
| 36 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 37 | .descriptorCount = 1, | ||
| 38 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 39 | .pImmutableSamplers = nullptr, | ||
| 40 | }); | ||
| 41 | ++binding; | ||
| 42 | } | ||
| 43 | for (const auto& desc : info.texture_descriptors) { | ||
| 44 | bindings.push_back({ | ||
| 45 | .binding = binding, | ||
| 46 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 47 | .descriptorCount = 1, | ||
| 48 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 49 | .pImmutableSamplers = nullptr, | ||
| 50 | }); | ||
| 51 | ++binding; | ||
| 52 | } | ||
| 53 | return device.GetLogical().CreateDescriptorSetLayout({ | ||
| 54 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 55 | .pNext = nullptr, | ||
| 56 | .flags = 0, | ||
| 57 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 58 | .pBindings = bindings.data(), | ||
| 59 | }); | ||
| 60 | } | ||
| 61 | |||
| 62 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | ||
| 63 | const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, | ||
| 64 | VkPipelineLayout pipeline_layout) { | ||
| 65 | boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries; | ||
| 66 | size_t offset{}; | ||
| 67 | u32 binding{}; | ||
| 68 | for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { | ||
| 69 | entries.push_back({ | ||
| 70 | .dstBinding = binding, | ||
| 71 | .dstArrayElement = 0, | ||
| 72 | .descriptorCount = 1, | ||
| 73 | .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, | ||
| 74 | .offset = offset, | ||
| 75 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 76 | }); | ||
| 77 | ++binding; | ||
| 78 | offset += sizeof(DescriptorUpdateEntry); | ||
| 79 | } | ||
| 80 | for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { | ||
| 81 | entries.push_back({ | ||
| 82 | .dstBinding = binding, | ||
| 83 | .dstArrayElement = 0, | ||
| 84 | .descriptorCount = 1, | ||
| 85 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 86 | .offset = offset, | ||
| 87 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 88 | }); | ||
| 89 | ++binding; | ||
| 90 | offset += sizeof(DescriptorUpdateEntry); | ||
| 91 | } | ||
| 92 | for (const auto& desc : info.texture_descriptors) { | ||
| 93 | entries.push_back({ | ||
| 94 | .dstBinding = binding, | ||
| 95 | .dstArrayElement = 0, | ||
| 96 | .descriptorCount = 1, | ||
| 97 | .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||
| 98 | .offset = offset, | ||
| 99 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 100 | }); | ||
| 101 | ++binding; | ||
| 102 | offset += sizeof(DescriptorUpdateEntry); | ||
| 103 | } | ||
| 104 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||
| 105 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, | ||
| 106 | .pNext = nullptr, | ||
| 107 | .flags = 0, | ||
| 108 | .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), | ||
| 109 | .pDescriptorUpdateEntries = entries.data(), | ||
| 110 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, | ||
| 111 | .descriptorSetLayout = descriptor_set_layout, | ||
| 112 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, | ||
| 113 | .pipelineLayout = pipeline_layout, | ||
| 114 | .set = 0, | ||
| 115 | }); | ||
| 116 | } | ||
| 117 | |||
| 118 | struct TextureHandle { | ||
| 119 | explicit TextureHandle(u32 data, bool via_header_index) { | ||
| 120 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 121 | image = handle.tic_id; | ||
| 122 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 123 | } | ||
| 124 | |||
| 125 | u32 image; | ||
| 126 | u32 sampler; | ||
| 127 | }; | ||
| 128 | |||
| 129 | VideoCommon::ImageViewType CastType(Shader::TextureType type) { | ||
| 130 | switch (type) { | ||
| 131 | case Shader::TextureType::Color1D: | ||
| 132 | case Shader::TextureType::Shadow1D: | ||
| 133 | return VideoCommon::ImageViewType::e1D; | ||
| 134 | case Shader::TextureType::ColorArray1D: | ||
| 135 | case Shader::TextureType::ShadowArray1D: | ||
| 136 | return VideoCommon::ImageViewType::e1DArray; | ||
| 137 | case Shader::TextureType::Color2D: | ||
| 138 | case Shader::TextureType::Shadow2D: | ||
| 139 | return VideoCommon::ImageViewType::e2D; | ||
| 140 | case Shader::TextureType::ColorArray2D: | ||
| 141 | case Shader::TextureType::ShadowArray2D: | ||
| 142 | return VideoCommon::ImageViewType::e2DArray; | ||
| 143 | case Shader::TextureType::Color3D: | ||
| 144 | case Shader::TextureType::Shadow3D: | ||
| 145 | return VideoCommon::ImageViewType::e3D; | ||
| 146 | case Shader::TextureType::ColorCube: | ||
| 147 | case Shader::TextureType::ShadowCube: | ||
| 148 | return VideoCommon::ImageViewType::Cube; | ||
| 149 | case Shader::TextureType::ColorArrayCube: | ||
| 150 | case Shader::TextureType::ShadowArrayCube: | ||
| 151 | return VideoCommon::ImageViewType::CubeArray; | ||
| 152 | } | ||
| 153 | UNREACHABLE_MSG("Invalid texture type {}", type); | ||
| 154 | } | 25 | } |
| 155 | } // Anonymous namespace | 26 | } // Anonymous namespace |
| 156 | 27 | ||
| @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip | |||
| 158 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 29 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 159 | const Shader::Info& info_, vk::ShaderModule spv_module_) | 30 | const Shader::Info& info_, vk::ShaderModule spv_module_) |
| 160 | : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, | 31 | : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, |
| 161 | spv_module(std::move(spv_module_)), | 32 | spv_module(std::move(spv_module_)) { |
| 162 | descriptor_set_layout(CreateDescriptorSetLayout(device, info)), | 33 | DescriptorLayoutTuple tuple{CreateLayout(device, info)}; |
| 163 | descriptor_allocator(descriptor_pool, *descriptor_set_layout), | 34 | descriptor_set_layout = std::move(tuple.descriptor_set_layout); |
| 164 | pipeline_layout{device.GetLogical().CreatePipelineLayout({ | 35 | pipeline_layout = std::move(tuple.pipeline_layout); |
| 165 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 36 | descriptor_update_template = std::move(tuple.descriptor_update_template); |
| 166 | .pNext = nullptr, | 37 | descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); |
| 167 | .flags = 0, | 38 | |
| 168 | .setLayoutCount = 1, | 39 | pipeline = device.GetLogical().CreateComputePipeline({ |
| 169 | .pSetLayouts = descriptor_set_layout.address(), | 40 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 170 | .pushConstantRangeCount = 0, | 41 | .pNext = nullptr, |
| 171 | .pPushConstantRanges = nullptr, | 42 | .flags = 0, |
| 172 | })}, | 43 | .stage{ |
| 173 | descriptor_update_template{ | 44 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 174 | CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, | 45 | .pNext = nullptr, |
| 175 | pipeline{device.GetLogical().CreateComputePipeline({ | 46 | .flags = 0, |
| 176 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 47 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 177 | .pNext = nullptr, | 48 | .module = *spv_module, |
| 178 | .flags = 0, | 49 | .pName = "main", |
| 179 | .stage{ | 50 | .pSpecializationInfo = nullptr, |
| 180 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 51 | }, |
| 181 | .pNext = nullptr, | 52 | .layout = *pipeline_layout, |
| 182 | .flags = 0, | 53 | .basePipelineHandle = 0, |
| 183 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 54 | .basePipelineIndex = 0, |
| 184 | .module = *spv_module, | 55 | }); |
| 185 | .pName = "main", | 56 | } |
| 186 | .pSpecializationInfo = nullptr, | ||
| 187 | }, | ||
| 188 | .layout = *pipeline_layout, | ||
| 189 | .basePipelineHandle = 0, | ||
| 190 | .basePipelineIndex = 0, | ||
| 191 | })} {} | ||
| 192 | 57 | ||
| 193 | void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { | 58 | void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { |
| 194 | buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); | 59 | buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); |
| @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple | |||
| 211 | static constexpr size_t max_elements = 64; | 76 | static constexpr size_t max_elements = 64; |
| 212 | std::array<ImageId, max_elements> image_view_ids; | 77 | std::array<ImageId, max_elements> image_view_ids; |
| 213 | boost::container::static_vector<u32, max_elements> image_view_indices; | 78 | boost::container::static_vector<u32, max_elements> image_view_indices; |
| 214 | boost::container::static_vector<VkSampler, max_elements> sampler_handles; | 79 | boost::container::static_vector<VkSampler, max_elements> samplers; |
| 215 | 80 | ||
| 216 | const auto& launch_desc{kepler_compute.launch_description}; | 81 | const auto& launch_desc{kepler_compute.launch_description}; |
| 217 | const auto& cbufs{launch_desc.const_buffer_config}; | 82 | const auto& cbufs{launch_desc.const_buffer_config}; |
| @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple | |||
| 228 | image_view_indices.push_back(handle.image); | 93 | image_view_indices.push_back(handle.image); |
| 229 | 94 | ||
| 230 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | 95 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); |
| 231 | sampler_handles.push_back(sampler->Handle()); | 96 | samplers.push_back(sampler->Handle()); |
| 232 | } | 97 | } |
| 233 | |||
| 234 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 98 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 235 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | 99 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); |
| 236 | 100 | ||
| 237 | size_t index{}; | 101 | size_t index{}; |
| 238 | for (const auto& desc : info.texture_descriptors) { | 102 | PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, |
| 239 | const VkSampler vk_sampler{sampler_handles[index]}; | 103 | *update_descriptor_queue, index); |
| 240 | ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; | ||
| 241 | const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; | ||
| 242 | update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); | ||
| 243 | ++index; | ||
| 244 | } | ||
| 245 | } | 104 | } |
| 246 | 105 | ||
| 247 | VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { | 106 | VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4..e82e5816b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include "video_core/memory_manager.h" | 9 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 12 | #include "video_core/renderer_vulkan/vk_pipeline.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 12 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 14 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 13 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -18,7 +17,7 @@ namespace Vulkan { | |||
| 18 | 17 | ||
| 19 | class Device; | 18 | class Device; |
| 20 | 19 | ||
| 21 | class ComputePipeline : public Pipeline { | 20 | class ComputePipeline { |
| 22 | public: | 21 | public: |
| 23 | explicit ComputePipeline() = default; | 22 | explicit ComputePipeline() = default; |
| 24 | explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, | 23 | explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..a2ec418b1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -0,0 +1,445 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <span> | ||
| 7 | |||
| 8 | #include <boost/container/small_vector.hpp> | ||
| 9 | #include <boost/container/static_vector.hpp> | ||
| 10 | |||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 13 | #include "video_core/renderer_vulkan/pipeline_helper.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 21 | |||
| 22 | namespace Vulkan { | ||
| 23 | namespace { | ||
| 24 | using boost::container::small_vector; | ||
| 25 | using boost::container::static_vector; | ||
| 26 | using VideoCore::Surface::PixelFormat; | ||
| 27 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 28 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 29 | |||
| 30 | DescriptorLayoutTuple CreateLayout(const Device& device, std::span<const Shader::Info> infos) { | ||
| 31 | DescriptorLayoutBuilder builder; | ||
| 32 | for (size_t index = 0; index < infos.size(); ++index) { | ||
| 33 | static constexpr std::array stages{ | ||
| 34 | VK_SHADER_STAGE_VERTEX_BIT, | ||
| 35 | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, | ||
| 36 | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, | ||
| 37 | VK_SHADER_STAGE_GEOMETRY_BIT, | ||
| 38 | VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 39 | }; | ||
| 40 | builder.Add(infos[index], stages.at(index)); | ||
| 41 | } | ||
| 42 | return builder.Create(device.GetLogical()); | ||
| 43 | } | ||
| 44 | |||
| 45 | template <class StencilFace> | ||
| 46 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { | ||
| 47 | return { | ||
| 48 | .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), | ||
| 49 | .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), | ||
| 50 | .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), | ||
| 51 | .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), | ||
| 52 | .compareMask = 0, | ||
| 53 | .writeMask = 0, | ||
| 54 | .reference = 0, | ||
| 55 | }; | ||
| 56 | } | ||
| 57 | |||
| 58 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { | ||
| 59 | static constexpr std::array unsupported_topologies{ | ||
| 60 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, | ||
| 61 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, | ||
| 62 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | ||
| 63 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, | ||
| 64 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, | ||
| 65 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, | ||
| 66 | // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, | ||
| 67 | }; | ||
| 68 | return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); | ||
| 69 | } | ||
| 70 | |||
| 71 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | ||
| 72 | union Swizzle { | ||
| 73 | u32 raw; | ||
| 74 | BitField<0, 3, Maxwell::ViewportSwizzle> x; | ||
| 75 | BitField<4, 3, Maxwell::ViewportSwizzle> y; | ||
| 76 | BitField<8, 3, Maxwell::ViewportSwizzle> z; | ||
| 77 | BitField<12, 3, Maxwell::ViewportSwizzle> w; | ||
| 78 | }; | ||
| 79 | const Swizzle unpacked{swizzle}; | ||
| 80 | return VkViewportSwizzleNV{ | ||
| 81 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), | ||
| 82 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), | ||
| 83 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), | ||
| 84 | .w = MaxwellToVK::ViewportSwizzle(unpacked.w), | ||
| 85 | }; | ||
| 86 | } | ||
| 87 | |||
| 88 | PixelFormat DecodeFormat(u8 encoded_format) { | ||
| 89 | const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)}; | ||
| 90 | if (format == Tegra::RenderTargetFormat::NONE) { | ||
| 91 | return PixelFormat::Invalid; | ||
| 92 | } | ||
| 93 | return PixelFormatFromRenderTargetFormat(format); | ||
| 94 | } | ||
| 95 | |||
| 96 | RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { | ||
| 97 | RenderPassKey key; | ||
| 98 | std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); | ||
| 99 | if (state.depth_enabled != 0) { | ||
| 100 | const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())}; | ||
| 101 | key.depth_format = PixelFormatFromDepthFormat(depth_format); | ||
| 102 | } else { | ||
| 103 | key.depth_format = PixelFormat::Invalid; | ||
| 104 | } | ||
| 105 | key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); | ||
| 106 | return key; | ||
| 107 | } | ||
| 108 | } // Anonymous namespace | ||
| 109 | |||
| 110 | GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 111 | Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, | ||
| 112 | BufferCache& buffer_cache_, TextureCache& texture_cache_, | ||
| 113 | const Device& device, VKDescriptorPool& descriptor_pool, | ||
| 114 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 115 | RenderPassCache& render_pass_cache, | ||
| 116 | const FixedPipelineState& state, | ||
| 117 | std::array<vk::ShaderModule, NUM_STAGES> stages, | ||
| 118 | const std::array<const Shader::Info*, NUM_STAGES>& infos) | ||
| 119 | : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, | ||
| 120 | buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, | ||
| 121 | update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { | ||
| 122 | std::ranges::transform(infos, stage_infos.begin(), | ||
| 123 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | ||
| 124 | |||
| 125 | DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; | ||
| 126 | descriptor_set_layout = std::move(tuple.descriptor_set_layout); | ||
| 127 | pipeline_layout = std::move(tuple.pipeline_layout); | ||
| 128 | descriptor_update_template = std::move(tuple.descriptor_update_template); | ||
| 129 | descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); | ||
| 130 | |||
| 131 | const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; | ||
| 132 | MakePipeline(device, state, render_pass); | ||
| 133 | } | ||
| 134 | |||
| 135 | void GraphicsPipeline::Configure(bool is_indexed) { | ||
| 136 | static constexpr size_t max_images_elements = 64; | ||
| 137 | std::array<ImageId, max_images_elements> image_view_ids; | ||
| 138 | static_vector<u32, max_images_elements> image_view_indices; | ||
| 139 | static_vector<VkSampler, max_images_elements> samplers; | ||
| 140 | |||
| 141 | texture_cache->SynchronizeGraphicsDescriptors(); | ||
| 142 | texture_cache->UpdateRenderTargets(false); | ||
| 143 | |||
| 144 | const auto& regs{maxwell3d->regs}; | ||
| 145 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 146 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 147 | const Shader::Info& info{stage_infos[stage]}; | ||
| 148 | buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); | ||
| 149 | buffer_cache->UnbindGraphicsStorageBuffers(stage); | ||
| 150 | size_t index{}; | ||
| 151 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 152 | ASSERT(desc.count == 1); | ||
| 153 | buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, | ||
| 154 | true); | ||
| 155 | ++index; | ||
| 156 | } | ||
| 157 | const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; | ||
| 158 | for (const auto& desc : info.texture_descriptors) { | ||
| 159 | const u32 cbuf_index{desc.cbuf_index}; | ||
| 160 | const u32 cbuf_offset{desc.cbuf_offset}; | ||
| 161 | ASSERT(cbufs[cbuf_index].enabled); | ||
| 162 | const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; | ||
| 163 | const u32 raw_handle{gpu_memory->Read<u32>(addr)}; | ||
| 164 | |||
| 165 | const TextureHandle handle(raw_handle, via_header_index); | ||
| 166 | image_view_indices.push_back(handle.image); | ||
| 167 | |||
| 168 | Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; | ||
| 169 | samplers.push_back(sampler->Handle()); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 173 | buffer_cache->UpdateGraphicsBuffers(is_indexed); | ||
| 174 | texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 175 | |||
| 176 | buffer_cache->BindHostGeometryBuffers(is_indexed); | ||
| 177 | |||
| 178 | size_t index{}; | ||
| 179 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 180 | buffer_cache->BindHostStageBuffers(stage); | ||
| 181 | PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), | ||
| 182 | *texture_cache, *update_descriptor_queue, index); | ||
| 183 | } | ||
| 184 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||
| 185 | update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); | ||
| 186 | |||
| 187 | scheduler->BindGraphicsPipeline(*pipeline); | ||
| 188 | scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { | ||
| 189 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | ||
| 190 | nullptr); | ||
| 191 | }); | ||
| 192 | } | ||
| 193 | |||
| 194 | void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, | ||
| 195 | VkRenderPass render_pass) { | ||
| 196 | FixedPipelineState::DynamicState dynamic{}; | ||
| 197 | if (!device.IsExtExtendedDynamicStateSupported()) { | ||
| 198 | dynamic = state.dynamic_state; | ||
| 199 | } | ||
| 200 | static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; | ||
| 201 | static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; | ||
| 202 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 203 | const bool instanced = state.binding_divisors[index] != 0; | ||
| 204 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 205 | vertex_bindings.push_back({ | ||
| 206 | .binding = static_cast<u32>(index), | ||
| 207 | .stride = dynamic.vertex_strides[index], | ||
| 208 | .inputRate = rate, | ||
| 209 | }); | ||
| 210 | if (instanced) { | ||
| 211 | vertex_binding_divisors.push_back({ | ||
| 212 | .binding = static_cast<u32>(index), | ||
| 213 | .divisor = state.binding_divisors[index], | ||
| 214 | }); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; | ||
| 218 | const auto& input_attributes = stage_infos[0].loads_generics; | ||
| 219 | for (size_t index = 0; index < state.attributes.size(); ++index) { | ||
| 220 | const auto& attribute = state.attributes[index]; | ||
| 221 | if (!attribute.enabled || !input_attributes[index]) { | ||
| 222 | continue; | ||
| 223 | } | ||
| 224 | vertex_attributes.push_back({ | ||
| 225 | .location = static_cast<u32>(index), | ||
| 226 | .binding = attribute.buffer, | ||
| 227 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 228 | .offset = attribute.offset, | ||
| 229 | }); | ||
| 230 | } | ||
| 231 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ | ||
| 232 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 233 | .pNext = nullptr, | ||
| 234 | .flags = 0, | ||
| 235 | .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), | ||
| 236 | .pVertexBindingDescriptions = vertex_bindings.data(), | ||
| 237 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), | ||
| 238 | .pVertexAttributeDescriptions = vertex_attributes.data(), | ||
| 239 | }; | ||
| 240 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ | ||
| 241 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, | ||
| 242 | .pNext = nullptr, | ||
| 243 | .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), | ||
| 244 | .pVertexBindingDivisors = vertex_binding_divisors.data(), | ||
| 245 | }; | ||
| 246 | if (!vertex_binding_divisors.empty()) { | ||
| 247 | vertex_input_ci.pNext = &input_divisor_ci; | ||
| 248 | } | ||
| 249 | const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); | ||
| 250 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ | ||
| 251 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 252 | .pNext = nullptr, | ||
| 253 | .flags = 0, | ||
| 254 | .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), | ||
| 255 | .primitiveRestartEnable = state.primitive_restart_enable != 0 && | ||
| 256 | SupportsPrimitiveRestart(input_assembly_topology), | ||
| 257 | }; | ||
| 258 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ | ||
| 259 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, | ||
| 260 | .pNext = nullptr, | ||
| 261 | .flags = 0, | ||
| 262 | .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, | ||
| 263 | }; | ||
| 264 | VkPipelineViewportStateCreateInfo viewport_ci{ | ||
| 265 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 266 | .pNext = nullptr, | ||
| 267 | .flags = 0, | ||
| 268 | .viewportCount = Maxwell::NumViewports, | ||
| 269 | .pViewports = nullptr, | ||
| 270 | .scissorCount = Maxwell::NumViewports, | ||
| 271 | .pScissors = nullptr, | ||
| 272 | }; | ||
| 273 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | ||
| 274 | std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); | ||
| 275 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | ||
| 276 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | ||
| 277 | .pNext = nullptr, | ||
| 278 | .flags = 0, | ||
| 279 | .viewportCount = Maxwell::NumViewports, | ||
| 280 | .pViewportSwizzles = swizzles.data(), | ||
| 281 | }; | ||
| 282 | if (device.IsNvViewportSwizzleSupported()) { | ||
| 283 | viewport_ci.pNext = &swizzle_ci; | ||
| 284 | } | ||
| 285 | |||
| 286 | const VkPipelineRasterizationStateCreateInfo rasterization_ci{ | ||
| 287 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 288 | .pNext = nullptr, | ||
| 289 | .flags = 0, | ||
| 290 | .depthClampEnable = | ||
| 291 | static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), | ||
| 292 | .rasterizerDiscardEnable = | ||
| 293 | static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), | ||
| 294 | .polygonMode = VK_POLYGON_MODE_FILL, | ||
| 295 | .cullMode = static_cast<VkCullModeFlags>( | ||
| 296 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), | ||
| 297 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), | ||
| 298 | .depthBiasEnable = state.depth_bias_enable, | ||
| 299 | .depthBiasConstantFactor = 0.0f, | ||
| 300 | .depthBiasClamp = 0.0f, | ||
| 301 | .depthBiasSlopeFactor = 0.0f, | ||
| 302 | .lineWidth = 1.0f, | ||
| 303 | }; | ||
| 304 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ | ||
| 305 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 306 | .pNext = nullptr, | ||
| 307 | .flags = 0, | ||
| 308 | .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), | ||
| 309 | .sampleShadingEnable = VK_FALSE, | ||
| 310 | .minSampleShading = 0.0f, | ||
| 311 | .pSampleMask = nullptr, | ||
| 312 | .alphaToCoverageEnable = VK_FALSE, | ||
| 313 | .alphaToOneEnable = VK_FALSE, | ||
| 314 | }; | ||
| 315 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ | ||
| 316 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 317 | .pNext = nullptr, | ||
| 318 | .flags = 0, | ||
| 319 | .depthTestEnable = dynamic.depth_test_enable, | ||
| 320 | .depthWriteEnable = dynamic.depth_write_enable, | ||
| 321 | .depthCompareOp = dynamic.depth_test_enable | ||
| 322 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) | ||
| 323 | : VK_COMPARE_OP_ALWAYS, | ||
| 324 | .depthBoundsTestEnable = dynamic.depth_bounds_enable, | ||
| 325 | .stencilTestEnable = dynamic.stencil_enable, | ||
| 326 | .front = GetStencilFaceState(dynamic.front), | ||
| 327 | .back = GetStencilFaceState(dynamic.back), | ||
| 328 | .minDepthBounds = 0.0f, | ||
| 329 | .maxDepthBounds = 0.0f, | ||
| 330 | }; | ||
| 331 | static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | ||
| 332 | for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 333 | static constexpr std::array mask_table{ | ||
| 334 | VK_COLOR_COMPONENT_R_BIT, | ||
| 335 | VK_COLOR_COMPONENT_G_BIT, | ||
| 336 | VK_COLOR_COMPONENT_B_BIT, | ||
| 337 | VK_COLOR_COMPONENT_A_BIT, | ||
| 338 | }; | ||
| 339 | const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])}; | ||
| 340 | if (format == Tegra::RenderTargetFormat::NONE) { | ||
| 341 | continue; | ||
| 342 | } | ||
| 343 | const auto& blend{state.attachments[index]}; | ||
| 344 | const std::array mask{blend.Mask()}; | ||
| 345 | VkColorComponentFlags write_mask{}; | ||
| 346 | for (size_t i = 0; i < mask_table.size(); ++i) { | ||
| 347 | write_mask |= mask[i] ? mask_table[i] : 0; | ||
| 348 | } | ||
| 349 | cb_attachments.push_back({ | ||
| 350 | .blendEnable = blend.enable != 0, | ||
| 351 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), | ||
| 352 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), | ||
| 353 | .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), | ||
| 354 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), | ||
| 355 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), | ||
| 356 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), | ||
| 357 | .colorWriteMask = write_mask, | ||
| 358 | }); | ||
| 359 | } | ||
| 360 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ | ||
| 361 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 362 | .pNext = nullptr, | ||
| 363 | .flags = 0, | ||
| 364 | .logicOpEnable = VK_FALSE, | ||
| 365 | .logicOp = VK_LOGIC_OP_COPY, | ||
| 366 | .attachmentCount = static_cast<u32>(cb_attachments.size()), | ||
| 367 | .pAttachments = cb_attachments.data(), | ||
| 368 | .blendConstants = {}, | ||
| 369 | }; | ||
| 370 | static_vector<VkDynamicState, 17> dynamic_states{ | ||
| 371 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, | ||
| 372 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, | ||
| 373 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, | ||
| 374 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, | ||
| 375 | }; | ||
| 376 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 377 | static constexpr std::array extended{ | ||
| 378 | VK_DYNAMIC_STATE_CULL_MODE_EXT, | ||
| 379 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, | ||
| 380 | VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, | ||
| 381 | VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, | ||
| 382 | VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, | ||
| 383 | VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, | ||
| 384 | VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, | ||
| 385 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, | ||
| 386 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, | ||
| 387 | }; | ||
| 388 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); | ||
| 389 | } | ||
| 390 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ | ||
| 391 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 392 | .pNext = nullptr, | ||
| 393 | .flags = 0, | ||
| 394 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), | ||
| 395 | .pDynamicStates = dynamic_states.data(), | ||
| 396 | }; | ||
| 397 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 398 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||
| 399 | .pNext = nullptr, | ||
| 400 | .requiredSubgroupSize = GuestWarpSize, | ||
| 401 | }; | ||
| 402 | static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages; | ||
| 403 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 404 | if (!spv_modules[stage]) { | ||
| 405 | continue; | ||
| 406 | } | ||
| 407 | [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ | ||
| 408 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 409 | .pNext = nullptr, | ||
| 410 | .flags = 0, | ||
| 411 | .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)), | ||
| 412 | .module = *spv_modules[stage], | ||
| 413 | .pName = "main", | ||
| 414 | .pSpecializationInfo = nullptr, | ||
| 415 | }); | ||
| 416 | /* | ||
| 417 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { | ||
| 418 | stage_ci.pNext = &subgroup_size_ci; | ||
| 419 | } | ||
| 420 | */ | ||
| 421 | } | ||
| 422 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | ||
| 423 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 424 | .pNext = nullptr, | ||
| 425 | .flags = 0, | ||
| 426 | .stageCount = static_cast<u32>(shader_stages.size()), | ||
| 427 | .pStages = shader_stages.data(), | ||
| 428 | .pVertexInputState = &vertex_input_ci, | ||
| 429 | .pInputAssemblyState = &input_assembly_ci, | ||
| 430 | .pTessellationState = &tessellation_ci, | ||
| 431 | .pViewportState = &viewport_ci, | ||
| 432 | .pRasterizationState = &rasterization_ci, | ||
| 433 | .pMultisampleState = &multisample_ci, | ||
| 434 | .pDepthStencilState = &depth_stencil_ci, | ||
| 435 | .pColorBlendState = &color_blend_ci, | ||
| 436 | .pDynamicState = &dynamic_state_ci, | ||
| 437 | .layout = *pipeline_layout, | ||
| 438 | .renderPass = render_pass, | ||
| 439 | .subpass = 0, | ||
| 440 | .basePipelineHandle = nullptr, | ||
| 441 | .basePipelineIndex = 0, | ||
| 442 | }); | ||
| 443 | } | ||
| 444 | |||
| 445 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..ba1d34a83 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | |||
| 9 | #include "shader_recompiler/shader_info.h" | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | |||
| 19 | class Device; | ||
| 20 | class RenderPassCache; | ||
| 21 | class VKScheduler; | ||
| 22 | class VKUpdateDescriptorQueue; | ||
| 23 | |||
| 24 | class GraphicsPipeline { | ||
| 25 | static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||
| 26 | |||
| 27 | public: | ||
| 28 | explicit GraphicsPipeline() = default; | ||
| 29 | explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 30 | Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, | ||
| 31 | BufferCache& buffer_cache, | ||
| 32 | TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, | ||
| 33 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 34 | RenderPassCache& render_pass_cache, const FixedPipelineState& state, | ||
| 35 | std::array<vk::ShaderModule, NUM_STAGES> stages, | ||
| 36 | const std::array<const Shader::Info*, NUM_STAGES>& infos); | ||
| 37 | |||
| 38 | void Configure(bool is_indexed); | ||
| 39 | |||
| 40 | GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; | ||
| 41 | GraphicsPipeline(GraphicsPipeline&&) noexcept = default; | ||
| 42 | |||
| 43 | GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; | ||
| 44 | GraphicsPipeline(const GraphicsPipeline&) = delete; | ||
| 45 | |||
| 46 | private: | ||
| 47 | void MakePipeline(const Device& device, const FixedPipelineState& state, | ||
| 48 | VkRenderPass render_pass); | ||
| 49 | |||
| 50 | Tegra::Engines::Maxwell3D* maxwell3d{}; | ||
| 51 | Tegra::MemoryManager* gpu_memory{}; | ||
| 52 | TextureCache* texture_cache{}; | ||
| 53 | BufferCache* buffer_cache{}; | ||
| 54 | VKScheduler* scheduler{}; | ||
| 55 | VKUpdateDescriptorQueue* update_descriptor_queue{}; | ||
| 56 | |||
| 57 | std::array<vk::ShaderModule, NUM_STAGES> spv_modules; | ||
| 58 | std::array<Shader::Info, NUM_STAGES> stage_infos; | ||
| 59 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 60 | DescriptorAllocator descriptor_allocator; | ||
| 61 | vk::PipelineLayout pipeline_layout; | ||
| 62 | vk::DescriptorUpdateTemplateKHR descriptor_update_template; | ||
| 63 | vk::Pipeline pipeline; | ||
| 64 | }; | ||
| 65 | |||
| 66 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b06288403..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null | |||
| @@ -1,36 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | class Pipeline { | ||
| 14 | public: | ||
| 15 | /// Add a reference count to the pipeline | ||
| 16 | void AddRef() noexcept { | ||
| 17 | ++ref_count; | ||
| 18 | } | ||
| 19 | |||
| 20 | [[nodiscard]] bool RemoveRef() noexcept { | ||
| 21 | --ref_count; | ||
| 22 | return ref_count == 0; | ||
| 23 | } | ||
| 24 | |||
| 25 | [[nodiscard]] u64 UsageTick() const noexcept { | ||
| 26 | return usage_tick; | ||
| 27 | } | ||
| 28 | |||
| 29 | protected: | ||
| 30 | u64 usage_tick{}; | ||
| 31 | |||
| 32 | private: | ||
| 33 | size_t ref_count{}; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903..c9da2080d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -12,8 +12,11 @@ | |||
| 12 | #include "common/microprofile.h" | 12 | #include "common/microprofile.h" |
| 13 | #include "core/core.h" | 13 | #include "core/core.h" |
| 14 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 15 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 15 | #include "shader_recompiler/environment.h" | 16 | #include "shader_recompiler/environment.h" |
| 16 | #include "shader_recompiler/recompiler.h" | 17 | #include "shader_recompiler/frontend/maxwell/control_flow.h" |
| 18 | #include "shader_recompiler/frontend/maxwell/program.h" | ||
| 19 | #include "shader_recompiler/program_header.h" | ||
| 17 | #include "video_core/engines/kepler_compute.h" | 20 | #include "video_core/engines/kepler_compute.h" |
| 18 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 19 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| @@ -34,18 +37,18 @@ | |||
| 34 | namespace Vulkan { | 37 | namespace Vulkan { |
| 35 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 38 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 36 | 39 | ||
| 37 | using Tegra::Engines::ShaderType; | ||
| 38 | |||
| 39 | namespace { | 40 | namespace { |
| 40 | class Environment final : public Shader::Environment { | 41 | using Shader::Backend::SPIRV::EmitSPIRV; |
| 42 | |||
| 43 | class GenericEnvironment : public Shader::Environment { | ||
| 41 | public: | 44 | public: |
| 42 | explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, | 45 | explicit GenericEnvironment() = default; |
| 43 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) | 46 | explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) |
| 44 | : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} | 47 | : gpu_memory{&gpu_memory_}, program_base{program_base_} {} |
| 45 | 48 | ||
| 46 | ~Environment() override = default; | 49 | ~GenericEnvironment() override = default; |
| 47 | 50 | ||
| 48 | [[nodiscard]] std::optional<u128> Analyze(u32 start_address) { | 51 | std::optional<u128> Analyze(u32 start_address) { |
| 49 | const std::optional<u64> size{TryFindSize(start_address)}; | 52 | const std::optional<u64> size{TryFindSize(start_address)}; |
| 50 | if (!size) { | 53 | if (!size) { |
| 51 | return std::nullopt; | 54 | return std::nullopt; |
| @@ -55,52 +58,47 @@ public: | |||
| 55 | return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size()); | 58 | return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size()); |
| 56 | } | 59 | } |
| 57 | 60 | ||
| 58 | [[nodiscard]] size_t ShaderSize() const noexcept { | 61 | [[nodiscard]] size_t CachedSize() const noexcept { |
| 62 | return cached_highest - cached_lowest + INST_SIZE; | ||
| 63 | } | ||
| 64 | |||
| 65 | [[nodiscard]] size_t ReadSize() const noexcept { | ||
| 59 | return read_highest - read_lowest + INST_SIZE; | 66 | return read_highest - read_lowest + INST_SIZE; |
| 60 | } | 67 | } |
| 61 | 68 | ||
| 62 | [[nodiscard]] u128 ComputeHash() const { | 69 | [[nodiscard]] u128 CalculateHash() const { |
| 63 | const size_t size{ShaderSize()}; | 70 | const size_t size{ReadSize()}; |
| 64 | auto data = std::make_unique<u64[]>(size); | 71 | auto data = std::make_unique<u64[]>(size); |
| 65 | gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); | 72 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); |
| 66 | return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size); | 73 | return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size); |
| 67 | } | 74 | } |
| 68 | 75 | ||
| 69 | u64 ReadInstruction(u32 address) override { | 76 | u64 ReadInstruction(u32 address) final { |
| 70 | read_lowest = std::min(read_lowest, address); | 77 | read_lowest = std::min(read_lowest, address); |
| 71 | read_highest = std::max(read_highest, address); | 78 | read_highest = std::max(read_highest, address); |
| 72 | 79 | ||
| 73 | if (address >= cached_lowest && address < cached_highest) { | 80 | if (address >= cached_lowest && address < cached_highest) { |
| 74 | return code[address / INST_SIZE]; | 81 | return code[address / INST_SIZE]; |
| 75 | } | 82 | } |
| 76 | return gpu_memory.Read<u64>(program_base + address); | 83 | return gpu_memory->Read<u64>(program_base + address); |
| 77 | } | ||
| 78 | |||
| 79 | u32 TextureBoundBuffer() override { | ||
| 80 | return kepler_compute.regs.tex_cb_index; | ||
| 81 | } | ||
| 82 | |||
| 83 | std::array<u32, 3> WorkgroupSize() override { | ||
| 84 | const auto& qmd{kepler_compute.launch_description}; | ||
| 85 | return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 86 | } | 84 | } |
| 87 | 85 | ||
| 88 | private: | 86 | protected: |
| 89 | static constexpr size_t INST_SIZE = sizeof(u64); | 87 | static constexpr size_t INST_SIZE = sizeof(u64); |
| 90 | static constexpr size_t BLOCK_SIZE = 0x1000; | ||
| 91 | static constexpr size_t MAXIMUM_SIZE = 0x100000; | ||
| 92 | 88 | ||
| 93 | static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; | 89 | std::optional<u64> TryFindSize(GPUVAddr guest_addr) { |
| 94 | static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; | 90 | constexpr size_t BLOCK_SIZE = 0x1000; |
| 91 | constexpr size_t MAXIMUM_SIZE = 0x100000; | ||
| 92 | |||
| 93 | constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; | ||
| 94 | constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; | ||
| 95 | 95 | ||
| 96 | std::optional<u64> TryFindSize(u32 start_address) { | ||
| 97 | GPUVAddr guest_addr = program_base + start_address; | ||
| 98 | size_t offset = 0; | 96 | size_t offset = 0; |
| 99 | size_t size = BLOCK_SIZE; | 97 | size_t size = BLOCK_SIZE; |
| 100 | while (size <= MAXIMUM_SIZE) { | 98 | while (size <= MAXIMUM_SIZE) { |
| 101 | code.resize(size / INST_SIZE); | 99 | code.resize(size / INST_SIZE); |
| 102 | u64* const data = code.data() + offset / INST_SIZE; | 100 | u64* const data = code.data() + offset / INST_SIZE; |
| 103 | gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); | 101 | gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); |
| 104 | for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { | 102 | for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { |
| 105 | const u64 inst = data[i / INST_SIZE]; | 103 | const u64 inst = data[i / INST_SIZE]; |
| 106 | if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { | 104 | if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { |
| @@ -114,17 +112,87 @@ private: | |||
| 114 | return std::nullopt; | 112 | return std::nullopt; |
| 115 | } | 113 | } |
| 116 | 114 | ||
| 117 | Tegra::Engines::KeplerCompute& kepler_compute; | 115 | Tegra::MemoryManager* gpu_memory{}; |
| 118 | Tegra::MemoryManager& gpu_memory; | 116 | GPUVAddr program_base{}; |
| 119 | GPUVAddr program_base; | 117 | |
| 118 | std::vector<u64> code; | ||
| 120 | 119 | ||
| 121 | u32 read_lowest = 0; | 120 | u32 read_lowest = std::numeric_limits<u32>::max(); |
| 122 | u32 read_highest = 0; | 121 | u32 read_highest = 0; |
| 123 | 122 | ||
| 124 | std::vector<u64> code; | ||
| 125 | u32 cached_lowest = std::numeric_limits<u32>::max(); | 123 | u32 cached_lowest = std::numeric_limits<u32>::max(); |
| 126 | u32 cached_highest = 0; | 124 | u32 cached_highest = 0; |
| 127 | }; | 125 | }; |
| 126 | |||
| 127 | class GraphicsEnvironment final : public GenericEnvironment { | ||
| 128 | public: | ||
| 129 | explicit GraphicsEnvironment() = default; | ||
| 130 | explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 131 | Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, | ||
| 132 | GPUVAddr program_base_, u32 start_offset) | ||
| 133 | : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { | ||
| 134 | gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); | ||
| 135 | switch (program) { | ||
| 136 | case Maxwell::ShaderProgram::VertexA: | ||
| 137 | stage = Shader::Stage::VertexA; | ||
| 138 | break; | ||
| 139 | case Maxwell::ShaderProgram::VertexB: | ||
| 140 | stage = Shader::Stage::VertexB; | ||
| 141 | break; | ||
| 142 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 143 | stage = Shader::Stage::TessellationControl; | ||
| 144 | break; | ||
| 145 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 146 | stage = Shader::Stage::TessellationEval; | ||
| 147 | break; | ||
| 148 | case Maxwell::ShaderProgram::Geometry: | ||
| 149 | stage = Shader::Stage::Geometry; | ||
| 150 | break; | ||
| 151 | case Maxwell::ShaderProgram::Fragment: | ||
| 152 | stage = Shader::Stage::Fragment; | ||
| 153 | break; | ||
| 154 | default: | ||
| 155 | UNREACHABLE_MSG("Invalid program={}", program); | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | ~GraphicsEnvironment() override = default; | ||
| 160 | |||
| 161 | u32 TextureBoundBuffer() override { | ||
| 162 | return maxwell3d->regs.tex_cb_index; | ||
| 163 | } | ||
| 164 | |||
| 165 | std::array<u32, 3> WorkgroupSize() override { | ||
| 166 | throw Shader::LogicError("Requesting workgroup size in a graphics stage"); | ||
| 167 | } | ||
| 168 | |||
| 169 | private: | ||
| 170 | Tegra::Engines::Maxwell3D* maxwell3d{}; | ||
| 171 | }; | ||
| 172 | |||
| 173 | class ComputeEnvironment final : public GenericEnvironment { | ||
| 174 | public: | ||
| 175 | explicit ComputeEnvironment() = default; | ||
| 176 | explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 177 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) | ||
| 178 | : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { | ||
| 179 | stage = Shader::Stage::Compute; | ||
| 180 | } | ||
| 181 | |||
| 182 | ~ComputeEnvironment() override = default; | ||
| 183 | |||
| 184 | u32 TextureBoundBuffer() override { | ||
| 185 | return kepler_compute->regs.tex_cb_index; | ||
| 186 | } | ||
| 187 | |||
| 188 | std::array<u32, 3> WorkgroupSize() override { | ||
| 189 | const auto& qmd{kepler_compute->launch_description}; | ||
| 190 | return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 191 | } | ||
| 192 | |||
| 193 | private: | ||
| 194 | Tegra::Engines::KeplerCompute* kepler_compute{}; | ||
| 195 | }; | ||
| 128 | } // Anonymous namespace | 196 | } // Anonymous namespace |
| 129 | 197 | ||
| 130 | size_t ComputePipelineCacheKey::Hash() const noexcept { | 198 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con | |||
| 136 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 204 | return std::memcmp(&rhs, this, sizeof *this) == 0; |
| 137 | } | 205 | } |
| 138 | 206 | ||
| 207 | size_t GraphicsPipelineCacheKey::Hash() const noexcept { | ||
| 208 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); | ||
| 209 | return static_cast<size_t>(hash); | ||
| 210 | } | ||
| 211 | |||
| 212 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 213 | return std::memcmp(&rhs, this, Size()) == 0; | ||
| 214 | } | ||
| 215 | |||
| 139 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 216 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, |
| 140 | Tegra::Engines::Maxwell3D& maxwell3d_, | 217 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 141 | Tegra::Engines::KeplerCompute& kepler_compute_, | 218 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 142 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 219 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 143 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 220 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, |
| 144 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 221 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 222 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | ||
| 223 | TextureCache& texture_cache_) | ||
| 145 | : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | 224 | : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, |
| 146 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | 225 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, |
| 147 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ | 226 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, |
| 148 | update_descriptor_queue_} {} | 227 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, |
| 228 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { | ||
| 229 | const auto& float_control{device.FloatControlProperties()}; | ||
| 230 | profile = Shader::Profile{ | ||
| 231 | .unified_descriptor_binding = true, | ||
| 232 | .support_float_controls = true, | ||
| 233 | .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == | ||
| 234 | VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 235 | .support_separate_rounding_mode = | ||
| 236 | float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 237 | .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, | ||
| 238 | .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, | ||
| 239 | .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, | ||
| 240 | .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, | ||
| 241 | .support_fp16_signed_zero_nan_preserve = | ||
| 242 | float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, | ||
| 243 | .support_fp32_signed_zero_nan_preserve = | ||
| 244 | float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, | ||
| 245 | .has_broken_spirv_clamp = true, // TODO: is_intel | ||
| 246 | }; | ||
| 247 | } | ||
| 149 | 248 | ||
| 150 | PipelineCache::~PipelineCache() = default; | 249 | PipelineCache::~PipelineCache() = default; |
| 151 | 250 | ||
| 251 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | ||
| 252 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 253 | |||
| 254 | if (!RefreshStages()) { | ||
| 255 | return nullptr; | ||
| 256 | } | ||
| 257 | graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); | ||
| 258 | |||
| 259 | const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; | ||
| 260 | auto& pipeline{pair->second}; | ||
| 261 | if (!is_new) { | ||
| 262 | return &pipeline; | ||
| 263 | } | ||
| 264 | pipeline = CreateGraphicsPipeline(); | ||
| 265 | return &pipeline; | ||
| 266 | } | ||
| 267 | |||
| 152 | ComputePipeline* PipelineCache::CurrentComputePipeline() { | 268 | ComputePipeline* PipelineCache::CurrentComputePipeline() { |
| 153 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 269 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 154 | 270 | ||
| @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { | |||
| 170 | return &pipeline; | 286 | return &pipeline; |
| 171 | } | 287 | } |
| 172 | pipeline = CreateComputePipeline(shader); | 288 | pipeline = CreateComputePipeline(shader); |
| 173 | shader->compute_users.push_back(key); | ||
| 174 | return &pipeline; | 289 | return &pipeline; |
| 175 | } | 290 | } |
| 176 | 291 | ||
| 292 | bool PipelineCache::RefreshStages() { | ||
| 293 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||
| 294 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 295 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 296 | graphics_key.unique_hashes[index] = u128{}; | ||
| 297 | continue; | ||
| 298 | } | ||
| 299 | const auto& shader_config{maxwell3d.regs.shader_config[index]}; | ||
| 300 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 301 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | ||
| 302 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 303 | if (!cpu_shader_addr) { | ||
| 304 | LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 305 | return false; | ||
| 306 | } | ||
| 307 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; | ||
| 308 | if (!shader_info) { | ||
| 309 | const u32 offset{shader_config.offset}; | ||
| 310 | shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); | ||
| 311 | } | ||
| 312 | graphics_key.unique_hashes[index] = shader_info->unique_hash; | ||
| 313 | } | ||
| 314 | return true; | ||
| 315 | } | ||
| 316 | |||
| 317 | const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, | ||
| 318 | u32 start_address, VAddr cpu_addr) { | ||
| 319 | GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 320 | auto info = std::make_unique<ShaderInfo>(); | ||
| 321 | if (const std::optional<u128> cached_hash{env.Analyze(start_address)}) { | ||
| 322 | info->unique_hash = *cached_hash; | ||
| 323 | info->size_bytes = env.CachedSize(); | ||
| 324 | } else { | ||
| 325 | // Slow path, not really hit on commercial games | ||
| 326 | // Build a control flow graph to get the real shader size | ||
| 327 | flow_block_pool.ReleaseContents(); | ||
| 328 | Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; | ||
| 329 | info->unique_hash = env.CalculateHash(); | ||
| 330 | info->size_bytes = env.ReadSize(); | ||
| 331 | } | ||
| 332 | const size_t size_bytes{info->size_bytes}; | ||
| 333 | const ShaderInfo* const result{info.get()}; | ||
| 334 | Register(std::move(info), cpu_addr, size_bytes); | ||
| 335 | return result; | ||
| 336 | } | ||
| 337 | |||
| 338 | GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { | ||
| 339 | flow_block_pool.ReleaseContents(); | ||
| 340 | inst_pool.ReleaseContents(); | ||
| 341 | block_pool.ReleaseContents(); | ||
| 342 | |||
| 343 | std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> envs; | ||
| 344 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||
| 345 | |||
| 346 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||
| 347 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 348 | if (graphics_key.unique_hashes[index] == u128{}) { | ||
| 349 | continue; | ||
| 350 | } | ||
| 351 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 352 | GraphicsEnvironment& env{envs[index]}; | ||
| 353 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | ||
| 354 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 355 | |||
| 356 | const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); | ||
| 357 | Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); | ||
| 358 | programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); | ||
| 359 | } | ||
| 360 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | ||
| 361 | std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; | ||
| 362 | |||
| 363 | u32 binding{0}; | ||
| 364 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 365 | if (graphics_key.unique_hashes[index] == u128{}) { | ||
| 366 | continue; | ||
| 367 | } | ||
| 368 | UNIMPLEMENTED_IF(index == 0); | ||
| 369 | |||
| 370 | GraphicsEnvironment& env{envs[index]}; | ||
| 371 | Shader::IR::Program& program{programs[index]}; | ||
| 372 | |||
| 373 | const size_t stage_index{index - 1}; | ||
| 374 | infos[stage_index] = &program.info; | ||
| 375 | std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; | ||
| 376 | |||
| 377 | FILE* file = fopen("D:\\shader.spv", "wb"); | ||
| 378 | fwrite(code.data(), 4, code.size(), file); | ||
| 379 | fclose(file); | ||
| 380 | std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); | ||
| 381 | |||
| 382 | modules[stage_index] = BuildShader(device, code); | ||
| 383 | } | ||
| 384 | return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, | ||
| 385 | descriptor_pool, update_descriptor_queue, render_pass_cache, | ||
| 386 | graphics_key.state, std::move(modules), infos); | ||
| 387 | } | ||
| 388 | |||
| 177 | ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { | 389 | ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { |
| 178 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | 390 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; |
| 179 | const auto& qmd{kepler_compute.launch_description}; | 391 | const auto& qmd{kepler_compute.launch_description}; |
| 180 | Environment env{kepler_compute, gpu_memory, program_base}; | 392 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; |
| 181 | if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { | 393 | if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { |
| 182 | // TODO: Load from cache | 394 | // TODO: Load from cache |
| 183 | } | 395 | } |
| 184 | const auto& float_control{device.FloatControlProperties()}; | 396 | flow_block_pool.ReleaseContents(); |
| 185 | const Shader::Profile profile{ | 397 | inst_pool.ReleaseContents(); |
| 186 | .unified_descriptor_binding = true, | 398 | block_pool.ReleaseContents(); |
| 187 | .support_float_controls = true, | 399 | |
| 188 | .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == | 400 | Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; |
| 189 | VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | 401 | Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; |
| 190 | .support_separate_rounding_mode = | 402 | u32 binding{0}; |
| 191 | float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | 403 | std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; |
| 192 | .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, | ||
| 193 | .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, | ||
| 194 | .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, | ||
| 195 | .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, | ||
| 196 | .support_fp16_signed_zero_nan_preserve = | ||
| 197 | float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, | ||
| 198 | .support_fp32_signed_zero_nan_preserve = | ||
| 199 | float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, | ||
| 200 | .has_broken_spirv_clamp = true, // TODO: is_intel | ||
| 201 | }; | ||
| 202 | const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; | ||
| 203 | /* | 404 | /* |
| 204 | FILE* file = fopen("D:\\shader.spv", "wb"); | 405 | FILE* file = fopen("D:\\shader.spv", "wb"); |
| 205 | fwrite(code.data(), 4, code.size(), file); | 406 | fwrite(code.data(), 4, code.size(), file); |
| 206 | fclose(file); | 407 | fclose(file); |
| 207 | std::system("spirv-dis D:\\shader.spv"); | 408 | std::system("spirv-dis D:\\shader.spv"); |
| 208 | */ | 409 | */ |
| 209 | shader_info->unique_hash = env.ComputeHash(); | 410 | shader_info->unique_hash = env.CalculateHash(); |
| 210 | shader_info->size_bytes = env.ShaderSize(); | 411 | shader_info->size_bytes = env.ReadSize(); |
| 211 | return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, | 412 | return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, |
| 212 | BuildShader(device, code)}; | 413 | BuildShader(device, code)}; |
| 213 | } | 414 | } |
| 214 | 415 | ||
| @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ | |||
| 216 | ShaderInfo shader; | 417 | ShaderInfo shader; |
| 217 | ComputePipeline pipeline{CreateComputePipeline(&shader)}; | 418 | ComputePipeline pipeline{CreateComputePipeline(&shader)}; |
| 218 | const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; | 419 | const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; |
| 219 | shader.compute_users.push_back(key); | ||
| 220 | pipeline.AddRef(); | ||
| 221 | |||
| 222 | const size_t size_bytes{shader.size_bytes}; | 420 | const size_t size_bytes{shader.size_bytes}; |
| 223 | Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes); | 421 | Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes); |
| 224 | return &compute_cache.emplace(key, std::move(pipeline)).first->second; | 422 | return &compute_cache.emplace(key, std::move(pipeline)).first->second; |
| @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) | |||
| 233 | }; | 431 | }; |
| 234 | } | 432 | } |
| 235 | 433 | ||
| 236 | void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { | ||
| 237 | for (const ComputePipelineCacheKey& key : shader->compute_users) { | ||
| 238 | const auto it = compute_cache.find(key); | ||
| 239 | ASSERT(it != compute_cache.end()); | ||
| 240 | |||
| 241 | Pipeline& pipeline = it->second; | ||
| 242 | if (pipeline.RemoveRef()) { | ||
| 243 | // Wait for the pipeline to be free of GPU usage before destroying it | ||
| 244 | scheduler.Wait(pipeline.UsageTick()); | ||
| 245 | compute_cache.erase(it); | ||
| 246 | } | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | } // namespace Vulkan | 434 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27..60fb976df 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -12,11 +12,18 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include <vector> | 13 | #include <vector> |
| 14 | 14 | ||
| 15 | #include <boost/functional/hash.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/microinstruction.h" | ||
| 18 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 19 | #include "shader_recompiler/object_pool.h" | ||
| 20 | #include "shader_recompiler/profile.h" | ||
| 18 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 23 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 20 | #include "video_core/shader_cache.h" | 27 | #include "video_core/shader_cache.h" |
| 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 22 | 29 | ||
| @@ -26,13 +33,6 @@ class System; | |||
| 26 | 33 | ||
| 27 | namespace Vulkan { | 34 | namespace Vulkan { |
| 28 | 35 | ||
| 29 | class Device; | ||
| 30 | class RasterizerVulkan; | ||
| 31 | class ComputePipeline; | ||
| 32 | class VKDescriptorPool; | ||
| 33 | class VKScheduler; | ||
| 34 | class VKUpdateDescriptorQueue; | ||
| 35 | |||
| 36 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 36 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 37 | 37 | ||
| 38 | struct ComputePipelineCacheKey { | 38 | struct ComputePipelineCacheKey { |
| @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>) | |||
| 52 | static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>); | 52 | static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>); |
| 53 | static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | 53 | static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); |
| 54 | 54 | ||
| 55 | struct GraphicsPipelineCacheKey { | ||
| 56 | std::array<u128, 6> unique_hashes; | ||
| 57 | FixedPipelineState state; | ||
| 58 | |||
| 59 | size_t Hash() const noexcept; | ||
| 60 | |||
| 61 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; | ||
| 62 | |||
| 63 | bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 64 | return !operator==(rhs); | ||
| 65 | } | ||
| 66 | |||
| 67 | size_t Size() const noexcept { | ||
| 68 | return sizeof(unique_hashes) + state.Size(); | ||
| 69 | } | ||
| 70 | }; | ||
| 71 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | ||
| 72 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | ||
| 73 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | ||
| 74 | |||
| 55 | } // namespace Vulkan | 75 | } // namespace Vulkan |
| 56 | 76 | ||
| 57 | namespace std { | 77 | namespace std { |
| @@ -63,14 +83,28 @@ struct hash<Vulkan::ComputePipelineCacheKey> { | |||
| 63 | } | 83 | } |
| 64 | }; | 84 | }; |
| 65 | 85 | ||
| 86 | template <> | ||
| 87 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 88 | size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 89 | return k.Hash(); | ||
| 90 | } | ||
| 91 | }; | ||
| 92 | |||
| 66 | } // namespace std | 93 | } // namespace std |
| 67 | 94 | ||
| 68 | namespace Vulkan { | 95 | namespace Vulkan { |
| 69 | 96 | ||
| 97 | class ComputePipeline; | ||
| 98 | class Device; | ||
| 99 | class RasterizerVulkan; | ||
| 100 | class RenderPassCache; | ||
| 101 | class VKDescriptorPool; | ||
| 102 | class VKScheduler; | ||
| 103 | class VKUpdateDescriptorQueue; | ||
| 104 | |||
| 70 | struct ShaderInfo { | 105 | struct ShaderInfo { |
| 71 | u128 unique_hash{}; | 106 | u128 unique_hash{}; |
| 72 | size_t size_bytes{}; | 107 | size_t size_bytes{}; |
| 73 | std::vector<ComputePipelineCacheKey> compute_users; | ||
| 74 | }; | 108 | }; |
| 75 | 109 | ||
| 76 | class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { | 110 | class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { |
| @@ -80,15 +114,23 @@ public: | |||
| 80 | Tegra::Engines::KeplerCompute& kepler_compute, | 114 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 81 | Tegra::MemoryManager& gpu_memory, const Device& device, | 115 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 82 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, | 116 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, |
| 83 | VKUpdateDescriptorQueue& update_descriptor_queue); | 117 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 118 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | ||
| 119 | TextureCache& texture_cache); | ||
| 84 | ~PipelineCache() override; | 120 | ~PipelineCache() override; |
| 85 | 121 | ||
| 86 | [[nodiscard]] ComputePipeline* CurrentComputePipeline(); | 122 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); |
| 87 | 123 | ||
| 88 | protected: | 124 | [[nodiscard]] ComputePipeline* CurrentComputePipeline(); |
| 89 | void OnShaderRemoval(ShaderInfo* shader) override; | ||
| 90 | 125 | ||
| 91 | private: | 126 | private: |
| 127 | bool RefreshStages(); | ||
| 128 | |||
| 129 | const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, | ||
| 130 | u32 start_address, VAddr cpu_addr); | ||
| 131 | |||
| 132 | GraphicsPipeline CreateGraphicsPipeline(); | ||
| 133 | |||
| 92 | ComputePipeline CreateComputePipeline(ShaderInfo* shader); | 134 | ComputePipeline CreateComputePipeline(ShaderInfo* shader); |
| 93 | 135 | ||
| 94 | ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); | 136 | ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); |
| @@ -104,8 +146,20 @@ private: | |||
| 104 | VKScheduler& scheduler; | 146 | VKScheduler& scheduler; |
| 105 | VKDescriptorPool& descriptor_pool; | 147 | VKDescriptorPool& descriptor_pool; |
| 106 | VKUpdateDescriptorQueue& update_descriptor_queue; | 148 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 149 | RenderPassCache& render_pass_cache; | ||
| 150 | BufferCache& buffer_cache; | ||
| 151 | TextureCache& texture_cache; | ||
| 152 | |||
| 153 | GraphicsPipelineCacheKey graphics_key{}; | ||
| 107 | 154 | ||
| 108 | std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache; | 155 | std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache; |
| 156 | std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache; | ||
| 157 | |||
| 158 | Shader::ObjectPool<Shader::IR::Inst> inst_pool; | ||
| 159 | Shader::ObjectPool<Shader::IR::Block> block_pool; | ||
| 160 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block_pool; | ||
| 161 | |||
| 162 | Shader::Profile profile; | ||
| 109 | }; | 163 | }; |
| 110 | 164 | ||
| 111 | } // namespace Vulkan | 165 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29..036b531b9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 141 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 141 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 142 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, | 142 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, |
| 143 | memory_allocator), | 143 | memory_allocator), |
| 144 | texture_cache_runtime{device, scheduler, memory_allocator, | 144 | render_pass_cache(device), texture_cache_runtime{device, scheduler, |
| 145 | staging_pool, blit_image, astc_decoder_pass}, | 145 | memory_allocator, staging_pool, |
| 146 | blit_image, astc_decoder_pass, | ||
| 147 | render_pass_cache}, | ||
| 146 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 148 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 147 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 149 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 148 | update_descriptor_queue, descriptor_pool), | 150 | update_descriptor_queue, descriptor_pool), |
| 149 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 151 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 150 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 152 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 151 | descriptor_pool, update_descriptor_queue), | 153 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, |
| 152 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | 154 | texture_cache), |
| 155 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, | ||
| 153 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 156 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 154 | wfi_event(device.GetLogical().CreateEvent()) { | 157 | wfi_event(device.GetLogical().CreateEvent()) { |
| 155 | scheduler.SetQueryCache(query_cache); | 158 | scheduler.SetQueryCache(query_cache); |
| @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 158 | RasterizerVulkan::~RasterizerVulkan() = default; | 161 | RasterizerVulkan::~RasterizerVulkan() = default; |
| 159 | 162 | ||
| 160 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | 163 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { |
| 161 | UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); | 164 | MICROPROFILE_SCOPE(Vulkan_Drawing); |
| 165 | |||
| 166 | SCOPE_EXIT({ gpu.TickWork(); }); | ||
| 167 | FlushWork(); | ||
| 168 | |||
| 169 | query_cache.UpdateCounters(); | ||
| 170 | |||
| 171 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; | ||
| 172 | if (!pipeline) { | ||
| 173 | return; | ||
| 174 | } | ||
| 175 | update_descriptor_queue.Acquire(); | ||
| 176 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 177 | pipeline->Configure(is_indexed); | ||
| 178 | |||
| 179 | BeginTransformFeedback(); | ||
| 180 | |||
| 181 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); | ||
| 182 | UpdateDynamicStates(); | ||
| 183 | |||
| 184 | const auto& regs{maxwell3d.regs}; | ||
| 185 | const u32 num_instances{maxwell3d.mme_draw.instance_count}; | ||
| 186 | const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; | ||
| 187 | scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { | ||
| 188 | if (draw_params.is_indexed) { | ||
| 189 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||
| 190 | draw_params.base_vertex, draw_params.base_instance); | ||
| 191 | } else { | ||
| 192 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||
| 193 | draw_params.base_vertex, draw_params.base_instance); | ||
| 194 | } | ||
| 195 | }); | ||
| 196 | EndTransformFeedback(); | ||
| 162 | } | 197 | } |
| 163 | 198 | ||
| 164 | void RasterizerVulkan::Clear() { | 199 | void RasterizerVulkan::Clear() { |
| @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { | |||
| 487 | if ((++draw_counter & 7) != 7) { | 522 | if ((++draw_counter & 7) != 7) { |
| 488 | return; | 523 | return; |
| 489 | } | 524 | } |
| 490 | |||
| 491 | if (draw_counter < DRAWS_TO_DISPATCH) { | 525 | if (draw_counter < DRAWS_TO_DISPATCH) { |
| 492 | // Send recorded tasks to the worker thread | 526 | // Send recorded tasks to the worker thread |
| 493 | scheduler.DispatchWork(); | 527 | scheduler.DispatchWork(); |
| 494 | return; | 528 | return; |
| 495 | } | 529 | } |
| 496 | |||
| 497 | // Otherwise (every certain number of draws) flush execution. | 530 | // Otherwise (every certain number of draws) flush execution. |
| 498 | // This submits commands to the Vulkan driver. | 531 | // This submits commands to the Vulkan driver. |
| 499 | scheduler.Flush(); | 532 | scheduler.Flush(); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b915..88dbd753b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 28 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| @@ -148,6 +149,7 @@ private: | |||
| 148 | VKUpdateDescriptorQueue update_descriptor_queue; | 149 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 149 | BlitImageHelper blit_image; | 150 | BlitImageHelper blit_image; |
| 150 | ASTCDecoderPass astc_decoder_pass; | 151 | ASTCDecoderPass astc_decoder_pass; |
| 152 | RenderPassCache render_pass_cache; | ||
| 151 | 153 | ||
| 152 | TextureCacheRuntime texture_cache_runtime; | 154 | TextureCacheRuntime texture_cache_runtime; |
| 153 | TextureCache texture_cache; | 155 | TextureCache texture_cache; |
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..7e5ae43ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include <boost/container/static_vector.hpp> | ||
| 10 | |||
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | namespace { | ||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | |||
| 21 | constexpr std::array ATTACHMENT_REFERENCES{ | ||
| 22 | VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 23 | VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 24 | VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 25 | VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 26 | VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 27 | VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 28 | VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 29 | VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 30 | VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 31 | }; | ||
| 32 | |||
| 33 | VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, | ||
| 34 | VkSampleCountFlagBits samples) { | ||
| 35 | using MaxwellToVK::SurfaceFormat; | ||
| 36 | return { | ||
| 37 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 38 | .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, | ||
| 39 | .samples = samples, | ||
| 40 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 41 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 42 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 43 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 44 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 45 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 46 | }; | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} | ||
| 51 | |||
| 52 | VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { | ||
| 53 | const auto [pair, is_new] = cache.try_emplace(key); | ||
| 54 | if (!is_new) { | ||
| 55 | return *pair->second; | ||
| 56 | } | ||
| 57 | boost::container::static_vector<VkAttachmentDescription, 9> descriptions; | ||
| 58 | u32 num_images{0}; | ||
| 59 | |||
| 60 | for (size_t index = 0; index < key.color_formats.size(); ++index) { | ||
| 61 | const PixelFormat format{key.color_formats[index]}; | ||
| 62 | if (format == PixelFormat::Invalid) { | ||
| 63 | continue; | ||
| 64 | } | ||
| 65 | descriptions.push_back(AttachmentDescription(*device, format, key.samples)); | ||
| 66 | ++num_images; | ||
| 67 | } | ||
| 68 | const size_t num_colors{descriptions.size()}; | ||
| 69 | const VkAttachmentReference* depth_attachment{}; | ||
| 70 | if (key.depth_format != PixelFormat::Invalid) { | ||
| 71 | depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; | ||
| 72 | descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); | ||
| 73 | } | ||
| 74 | const VkSubpassDescription subpass{ | ||
| 75 | .flags = 0, | ||
| 76 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 77 | .inputAttachmentCount = 0, | ||
| 78 | .pInputAttachments = nullptr, | ||
| 79 | .colorAttachmentCount = static_cast<u32>(num_colors), | ||
| 80 | .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, | ||
| 81 | .pResolveAttachments = nullptr, | ||
| 82 | .pDepthStencilAttachment = depth_attachment, | ||
| 83 | .preserveAttachmentCount = 0, | ||
| 84 | .pPreserveAttachments = nullptr, | ||
| 85 | }; | ||
| 86 | pair->second = device->GetLogical().CreateRenderPass({ | ||
| 87 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 88 | .pNext = nullptr, | ||
| 89 | .flags = 0, | ||
| 90 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 91 | .pAttachments = descriptions.data(), | ||
| 92 | .subpassCount = 1, | ||
| 93 | .pSubpasses = &subpass, | ||
| 94 | .dependencyCount = 0, | ||
| 95 | .pDependencies = nullptr, | ||
| 96 | }); | ||
| 97 | return *pair->second; | ||
| 98 | } | ||
| 99 | |||
| 100 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..db8e83f1a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "video_core/surface.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 11 | |||
| 12 | namespace Vulkan { | ||
| 13 | |||
| 14 | struct RenderPassKey { | ||
| 15 | auto operator<=>(const RenderPassKey&) const noexcept = default; | ||
| 16 | |||
| 17 | std::array<VideoCore::Surface::PixelFormat, 8> color_formats; | ||
| 18 | VideoCore::Surface::PixelFormat depth_format; | ||
| 19 | VkSampleCountFlagBits samples; | ||
| 20 | }; | ||
| 21 | |||
| 22 | } // namespace Vulkan | ||
| 23 | |||
| 24 | namespace std { | ||
| 25 | template <> | ||
| 26 | struct hash<Vulkan::RenderPassKey> { | ||
| 27 | [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { | ||
| 28 | size_t value = static_cast<size_t>(key.depth_format) << 48; | ||
| 29 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 30 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 31 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 32 | } | ||
| 33 | return value; | ||
| 34 | } | ||
| 35 | }; | ||
| 36 | } // namespace std | ||
| 37 | |||
| 38 | namespace Vulkan { | ||
| 39 | |||
| 40 | class Device; | ||
| 41 | |||
| 42 | class RenderPassCache { | ||
| 43 | public: | ||
| 44 | explicit RenderPassCache(const Device& device_); | ||
| 45 | |||
| 46 | VkRenderPass Get(const RenderPassKey& key); | ||
| 47 | |||
| 48 | private: | ||
| 49 | const Device* device{}; | ||
| 50 | std::unordered_map<RenderPassKey, vk::RenderPass> cache; | ||
| 51 | }; | ||
| 52 | |||
| 53 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..1bbc542a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 19 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 19 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 20 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 21 | #include "video_core/vulkan_common/vulkan_device.h" | 22 | #include "video_core/vulkan_common/vulkan_device.h" |
| 22 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 23 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 23 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 24 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; | |||
| 34 | using VideoCore::Surface::IsPixelFormatASTC; | 35 | using VideoCore::Surface::IsPixelFormatASTC; |
| 35 | 36 | ||
| 36 | namespace { | 37 | namespace { |
| 37 | |||
| 38 | constexpr std::array ATTACHMENT_REFERENCES{ | ||
| 39 | VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 40 | VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 41 | VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 42 | VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 43 | VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 44 | VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 45 | VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 46 | VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 47 | VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 48 | }; | ||
| 49 | |||
| 50 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | 38 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { |
| 51 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | 39 | if (color == std::array<float, 4>{0, 0, 0, 0}) { |
| 52 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | 40 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; |
| @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 226 | } | 214 | } |
| 227 | } | 215 | } |
| 228 | 216 | ||
| 229 | [[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, | ||
| 230 | const ImageView* image_view) { | ||
| 231 | using MaxwellToVK::SurfaceFormat; | ||
| 232 | const PixelFormat pixel_format = image_view->format; | ||
| 233 | return VkAttachmentDescription{ | ||
| 234 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 235 | .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, | ||
| 236 | .samples = image_view->Samples(), | ||
| 237 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 238 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 239 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 240 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 241 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 242 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 243 | }; | ||
| 244 | } | ||
| 245 | |||
| 246 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { | 217 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { |
| 247 | switch (swizzle) { | 218 | switch (swizzle) { |
| 248 | case SwizzleSource::Zero: | 219 | case SwizzleSource::Zero: |
| @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t | |||
| 1164 | 1135 | ||
| 1165 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, | 1136 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1166 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | 1137 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { |
| 1167 | std::vector<VkAttachmentDescription> descriptions; | ||
| 1168 | std::vector<VkImageView> attachments; | 1138 | std::vector<VkImageView> attachments; |
| 1169 | RenderPassKey renderpass_key{}; | 1139 | RenderPassKey renderpass_key{}; |
| 1170 | s32 num_layers = 1; | 1140 | s32 num_layers = 1; |
| @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1175 | renderpass_key.color_formats[index] = PixelFormat::Invalid; | 1145 | renderpass_key.color_formats[index] = PixelFormat::Invalid; |
| 1176 | continue; | 1146 | continue; |
| 1177 | } | 1147 | } |
| 1178 | descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); | ||
| 1179 | attachments.push_back(color_buffer->RenderTarget()); | 1148 | attachments.push_back(color_buffer->RenderTarget()); |
| 1180 | renderpass_key.color_formats[index] = color_buffer->format; | 1149 | renderpass_key.color_formats[index] = color_buffer->format; |
| 1181 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); | 1150 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); |
| @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1185 | ++num_images; | 1154 | ++num_images; |
| 1186 | } | 1155 | } |
| 1187 | const size_t num_colors = attachments.size(); | 1156 | const size_t num_colors = attachments.size(); |
| 1188 | const VkAttachmentReference* depth_attachment = | ||
| 1189 | depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; | ||
| 1190 | if (depth_buffer) { | 1157 | if (depth_buffer) { |
| 1191 | descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); | ||
| 1192 | attachments.push_back(depth_buffer->RenderTarget()); | 1158 | attachments.push_back(depth_buffer->RenderTarget()); |
| 1193 | renderpass_key.depth_format = depth_buffer->format; | 1159 | renderpass_key.depth_format = depth_buffer->format; |
| 1194 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); | 1160 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); |
| @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1201 | } | 1167 | } |
| 1202 | renderpass_key.samples = samples; | 1168 | renderpass_key.samples = samples; |
| 1203 | 1169 | ||
| 1204 | const auto& device = runtime.device.GetLogical(); | 1170 | renderpass = runtime.render_pass_cache.Get(renderpass_key); |
| 1205 | const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); | 1171 | |
| 1206 | if (is_new) { | ||
| 1207 | const VkSubpassDescription subpass{ | ||
| 1208 | .flags = 0, | ||
| 1209 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 1210 | .inputAttachmentCount = 0, | ||
| 1211 | .pInputAttachments = nullptr, | ||
| 1212 | .colorAttachmentCount = static_cast<u32>(num_colors), | ||
| 1213 | .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, | ||
| 1214 | .pResolveAttachments = nullptr, | ||
| 1215 | .pDepthStencilAttachment = depth_attachment, | ||
| 1216 | .preserveAttachmentCount = 0, | ||
| 1217 | .pPreserveAttachments = nullptr, | ||
| 1218 | }; | ||
| 1219 | cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ | ||
| 1220 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 1221 | .pNext = nullptr, | ||
| 1222 | .flags = 0, | ||
| 1223 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 1224 | .pAttachments = descriptions.data(), | ||
| 1225 | .subpassCount = 1, | ||
| 1226 | .pSubpasses = &subpass, | ||
| 1227 | .dependencyCount = 0, | ||
| 1228 | .pDependencies = nullptr, | ||
| 1229 | }); | ||
| 1230 | } | ||
| 1231 | renderpass = *cache_pair->second; | ||
| 1232 | render_area = VkExtent2D{ | 1172 | render_area = VkExtent2D{ |
| 1233 | .width = key.size.width, | 1173 | .width = key.size.width, |
| 1234 | .height = key.size.height, | 1174 | .height = key.size.height, |
| 1235 | }; | 1175 | }; |
| 1236 | num_color_buffers = static_cast<u32>(num_colors); | 1176 | num_color_buffers = static_cast<u32>(num_colors); |
| 1237 | framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ | 1177 | framebuffer = runtime.device.GetLogical().CreateFramebuffer({ |
| 1238 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | 1178 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, |
| 1239 | .pNext = nullptr, | 1179 | .pNext = nullptr, |
| 1240 | .flags = 0, | 1180 | .flags = 0, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..189ee5a68 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -26,35 +26,10 @@ class Device; | |||
| 26 | class Image; | 26 | class Image; |
| 27 | class ImageView; | 27 | class ImageView; |
| 28 | class Framebuffer; | 28 | class Framebuffer; |
| 29 | class RenderPassCache; | ||
| 29 | class StagingBufferPool; | 30 | class StagingBufferPool; |
| 30 | class VKScheduler; | 31 | class VKScheduler; |
| 31 | 32 | ||
| 32 | struct RenderPassKey { | ||
| 33 | constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; | ||
| 34 | |||
| 35 | std::array<PixelFormat, NUM_RT> color_formats; | ||
| 36 | PixelFormat depth_format; | ||
| 37 | VkSampleCountFlagBits samples; | ||
| 38 | }; | ||
| 39 | |||
| 40 | } // namespace Vulkan | ||
| 41 | |||
| 42 | namespace std { | ||
| 43 | template <> | ||
| 44 | struct hash<Vulkan::RenderPassKey> { | ||
| 45 | [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { | ||
| 46 | size_t value = static_cast<size_t>(key.depth_format) << 48; | ||
| 47 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 48 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 49 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 50 | } | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | } // namespace std | ||
| 55 | |||
| 56 | namespace Vulkan { | ||
| 57 | |||
| 58 | struct TextureCacheRuntime { | 33 | struct TextureCacheRuntime { |
| 59 | const Device& device; | 34 | const Device& device; |
| 60 | VKScheduler& scheduler; | 35 | VKScheduler& scheduler; |
| @@ -62,7 +37,7 @@ struct TextureCacheRuntime { | |||
| 62 | StagingBufferPool& staging_buffer_pool; | 37 | StagingBufferPool& staging_buffer_pool; |
| 63 | BlitImageHelper& blit_image_helper; | 38 | BlitImageHelper& blit_image_helper; |
| 64 | ASTCDecoderPass& astc_decoder_pass; | 39 | ASTCDecoderPass& astc_decoder_pass; |
| 65 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; | 40 | RenderPassCache& render_pass_cache; |
| 66 | 41 | ||
| 67 | void Finish(); | 42 | void Finish(); |
| 68 | 43 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4887d6fd9..f0e5b098c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
| 49 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 49 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| 50 | VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, | 50 | VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, |
| 51 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | 51 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, |
| 52 | VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, | ||
| 52 | #ifdef _WIN32 | 53 | #ifdef _WIN32 |
| 53 | VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, | 54 | VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, |
| 54 | #endif | 55 | #endif |
| @@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 312 | }; | 313 | }; |
| 313 | SetNext(next, host_query_reset); | 314 | SetNext(next, host_query_reset); |
| 314 | 315 | ||
| 316 | VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ | ||
| 317 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, | ||
| 318 | .pNext = nullptr, | ||
| 319 | .shaderDemoteToHelperInvocation = true, | ||
| 320 | }; | ||
| 321 | SetNext(next, demote); | ||
| 322 | |||
| 315 | VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; | 323 | VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; |
| 316 | if (is_float16_supported) { | 324 | if (is_float16_supported) { |
| 317 | float16_int8 = { | 325 | float16_int8 = { |
| @@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 597 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); | 605 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); |
| 598 | } | 606 | } |
| 599 | } | 607 | } |
| 608 | VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; | ||
| 609 | demote.sType = | ||
| 610 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; | ||
| 611 | demote.pNext = nullptr; | ||
| 612 | |||
| 600 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; | 613 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; |
| 601 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | 614 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; |
| 615 | robustness2.pNext = &demote; | ||
| 602 | 616 | ||
| 603 | VkPhysicalDeviceFeatures2KHR features2{}; | 617 | VkPhysicalDeviceFeatures2KHR features2{}; |
| 604 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | 618 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; |
| @@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 625 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | 639 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |
| 626 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, | 640 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, |
| 627 | "shaderStorageImageWriteWithoutFormat"), | 641 | "shaderStorageImageWriteWithoutFormat"), |
| 642 | std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), | ||
| 628 | std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), | 643 | std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), |
| 629 | std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), | 644 | std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), |
| 630 | std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), | 645 | std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), |