summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/CMakeLists.txt4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp64
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h18
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp44
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h18
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp55
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp23
-rw-r--r--src/shader_recompiler/environment.h14
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp2
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h2
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp14
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h4
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp3
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc11
-rw-r--r--src/shader_recompiler/frontend/ir/program.h2
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h4
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp31
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h3
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp1
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp18
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp15
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h4
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp16
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp2
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp60
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp2
-rw-r--r--src/shader_recompiler/program_header.h143
-rw-r--r--src/shader_recompiler/recompiler.cpp28
-rw-r--r--src/shader_recompiler/recompiler.h20
-rw-r--r--src/shader_recompiler/shader_info.h10
-rw-r--r--src/shader_recompiler/stage.h19
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp4
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h9
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp24
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h162
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp209
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp445
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h66
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline.h36
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp346
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h82
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp100
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h53
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h29
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp15
54 files changed, 1927 insertions, 566 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index b870e9937..31c394106 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC
65 frontend/maxwell/translate/impl/common_funcs.h 65 frontend/maxwell/translate/impl/common_funcs.h
66 frontend/maxwell/translate/impl/condition_code_set.cpp 66 frontend/maxwell/translate/impl/condition_code_set.cpp
67 frontend/maxwell/translate/impl/double_add.cpp 67 frontend/maxwell/translate/impl/double_add.cpp
68 frontend/maxwell/translate/impl/exit_program.cpp
68 frontend/maxwell/translate/impl/find_leading_one.cpp 69 frontend/maxwell/translate/impl/find_leading_one.cpp
69 frontend/maxwell/translate/impl/floating_point_add.cpp 70 frontend/maxwell/translate/impl/floating_point_add.cpp
70 frontend/maxwell/translate/impl/floating_point_compare.cpp 71 frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC
121 ir_opt/texture_pass.cpp 122 ir_opt/texture_pass.cpp
122 ir_opt/verification_pass.cpp 123 ir_opt/verification_pass.cpp
123 object_pool.h 124 object_pool.h
125 program_header.h
124 profile.h 126 profile.h
125 recompiler.cpp
126 recompiler.h
127 shader_info.h 127 shader_info.h
128) 128)
129 129
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 204389d74..6c79b611b 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie
62 } 62 }
63} 63}
64 64
65EmitContext::EmitContext(const Profile& profile_, IR::Program& program) 65EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding)
66 : Sirit::Module(0x00010000), profile{profile_} { 66 : Sirit::Module(0x00010000), profile{profile_} {
67 AddCapability(spv::Capability::Shader); 67 AddCapability(spv::Capability::Shader);
68 DefineCommonTypes(program.info); 68 DefineCommonTypes(program.info);
69 DefineCommonConstants(); 69 DefineCommonConstants();
70 DefineSpecialVariables(program.info); 70 DefineInterfaces(program.info, program.stage);
71
72 u32 binding{};
73 DefineConstantBuffers(program.info, binding); 71 DefineConstantBuffers(program.info, binding);
74 DefineStorageBuffers(program.info, binding); 72 DefineStorageBuffers(program.info, binding);
75 DefineTextures(program.info, binding); 73 DefineTextures(program.info, binding);
76
77 DefineLabels(program); 74 DefineLabels(program);
78} 75}
79 76
@@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) {
96 return Constant(F32[1], value.F32()); 93 return Constant(F32[1], value.F32());
97 case IR::Type::F64: 94 case IR::Type::F64:
98 return Constant(F64[1], value.F64()); 95 return Constant(F64[1], value.F64());
96 case IR::Type::Label:
97 return value.Label()->Definition<Id>();
99 default: 98 default:
100 throw NotImplementedException("Immediate type {}", value.Type()); 99 throw NotImplementedException("Immediate type {}", value.Type());
101 } 100 }
@@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) {
109 F32.Define(*this, TypeFloat(32), "f32"); 108 F32.Define(*this, TypeFloat(32), "f32");
110 U32.Define(*this, TypeInt(32, false), "u32"); 109 U32.Define(*this, TypeInt(32, false), "u32");
111 110
111 input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
112 output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
113
112 if (info.uses_int8) { 114 if (info.uses_int8) {
113 AddCapability(spv::Capability::Int8); 115 AddCapability(spv::Capability::Int8);
114 U8 = Name(TypeInt(8, false), "u8"); 116 U8 = Name(TypeInt(8, false), "u8");
@@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() {
139 u32_zero_value = Constant(U32[1], 0U); 141 u32_zero_value = Constant(U32[1], 0U);
140} 142}
141 143
142void EmitContext::DefineSpecialVariables(const Info& info) { 144void EmitContext::DefineInterfaces(const Info& info, Stage stage) {
143 const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { 145 const auto define{
144 const Id pointer_type{TypePointer(storage_class, type)}; 146 [this](Id type, std::optional<spv::BuiltIn> builtin, spv::StorageClass storage_class) {
145 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; 147 const Id pointer_type{TypePointer(storage_class, type)};
146 Decorate(id, spv::Decoration::BuiltIn, builtin); 148 const Id id{AddGlobalVariable(pointer_type, storage_class)};
147 return id; 149 if (builtin) {
148 }}; 150 Decorate(id, spv::Decoration::BuiltIn, *builtin);
151 }
152 interfaces.push_back(id);
153 return id;
154 }};
149 using namespace std::placeholders; 155 using namespace std::placeholders;
150 const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; 156 const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)};
157 const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)};
151 158
152 if (info.uses_workgroup_id) { 159 if (info.uses_workgroup_id) {
153 workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); 160 workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId);
@@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) {
155 if (info.uses_local_invocation_id) { 162 if (info.uses_local_invocation_id) {
156 local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); 163 local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId);
157 } 164 }
165 if (info.loads_position) {
166 const bool is_fragment{stage != Stage::Fragment};
167 const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
168 input_position = define_input(F32[4], built_in);
169 }
170 for (size_t i = 0; i < info.loads_generics.size(); ++i) {
171 if (info.loads_generics[i]) {
172 // FIXME: Declare size from input
173 input_generics[i] = define_input(F32[4], std::nullopt);
174 Decorate(input_generics[i], spv::Decoration::Location, static_cast<u32>(i));
175 Name(input_generics[i], fmt::format("in_attr{}", i));
176 }
177 }
178 if (info.stores_position) {
179 output_position = define_output(F32[4], spv::BuiltIn::Position);
180 }
181 for (size_t i = 0; i < info.stores_generics.size(); ++i) {
182 if (info.stores_generics[i]) {
183 output_generics[i] = define_output(F32[4], std::nullopt);
184 Decorate(output_generics[i], spv::Decoration::Location, static_cast<u32>(i));
185 Name(output_generics[i], fmt::format("out_attr{}", i));
186 }
187 }
188 if (stage == Stage::Fragment) {
189 for (size_t i = 0; i < 8; ++i) {
190 if (!info.stores_frag_color[i]) {
191 continue;
192 }
193 frag_color[i] = define_output(F32[4], std::nullopt);
194 Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i));
195 Name(frag_color[i], fmt::format("frag_color{}", i));
196 }
197 }
158} 198}
159 199
160void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { 200void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index 35eca258a..2d7961ac3 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -46,7 +46,7 @@ struct UniformDefinitions {
46 46
47class EmitContext final : public Sirit::Module { 47class EmitContext final : public Sirit::Module {
48public: 48public:
49 explicit EmitContext(const Profile& profile, IR::Program& program); 49 explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
50 ~EmitContext(); 50 ~EmitContext();
51 51
52 [[nodiscard]] Id Def(const IR::Value& value); 52 [[nodiscard]] Id Def(const IR::Value& value);
@@ -71,6 +71,9 @@ public:
71 71
72 UniformDefinitions uniform_types; 72 UniformDefinitions uniform_types;
73 73
74 Id input_f32{};
75 Id output_f32{};
76
74 Id storage_u32{}; 77 Id storage_u32{};
75 78
76 std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; 79 std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
@@ -80,10 +83,21 @@ public:
80 Id workgroup_id{}; 83 Id workgroup_id{};
81 Id local_invocation_id{}; 84 Id local_invocation_id{};
82 85
86 Id input_position{};
87 std::array<Id, 32> input_generics{};
88
89 Id output_position{};
90 std::array<Id, 32> output_generics{};
91
92 std::array<Id, 8> frag_color{};
93 Id frag_depth {};
94
95 std::vector<Id> interfaces;
96
83private: 97private:
84 void DefineCommonTypes(const Info& info); 98 void DefineCommonTypes(const Info& info);
85 void DefineCommonConstants(); 99 void DefineCommonConstants();
86 void DefineSpecialVariables(const Info& info); 100 void DefineInterfaces(const Info& info, Stage stage);
87 void DefineConstantBuffers(const Info& info, u32& binding); 101 void DefineConstantBuffers(const Info& info, u32& binding);
88 void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, 102 void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding,
89 Id type, char type_char, u32 element_size); 103 Id type, char type_char, u32 element_size);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 50c0f7243..b8978b94a 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
54 return arg.U32(); 54 return arg.U32();
55 } else if constexpr (std::is_same_v<ArgType, IR::Block*>) { 55 } else if constexpr (std::is_same_v<ArgType, IR::Block*>) {
56 return arg.Label(); 56 return arg.Label();
57 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
58 return arg.Attribute();
57 } 59 }
58} 60}
59 61
@@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) {
197} 199}
198} // Anonymous namespace 200} // Anonymous namespace
199 201
200std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { 202std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program,
201 EmitContext ctx{profile, program}; 203 u32& binding) {
204 EmitContext ctx{profile, program, binding};
202 const Id void_function{ctx.TypeFunction(ctx.void_id)}; 205 const Id void_function{ctx.TypeFunction(ctx.void_id)};
203 const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; 206 const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)};
204 for (IR::Block* const block : program.blocks) { 207 for (IR::Block* const block : program.blocks) {
@@ -208,28 +211,41 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program
208 } 211 }
209 } 212 }
210 ctx.OpFunctionEnd(); 213 ctx.OpFunctionEnd();
211 boost::container::small_vector<Id, 32> interfaces; 214
212 const Info& info{program.info}; 215 const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
213 if (info.uses_workgroup_id) { 216 spv::ExecutionModel execution_model{};
214 interfaces.push_back(ctx.workgroup_id); 217 switch (env.ShaderStage()) {
218 case Shader::Stage::Compute: {
219 const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
220 execution_model = spv::ExecutionModel::GLCompute;
221 ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0],
222 workgroup_size[1], workgroup_size[2]);
223 break;
215 } 224 }
216 if (info.uses_local_invocation_id) { 225 case Shader::Stage::VertexB:
217 interfaces.push_back(ctx.local_invocation_id); 226 execution_model = spv::ExecutionModel::Vertex;
227 break;
228 case Shader::Stage::Fragment:
229 execution_model = spv::ExecutionModel::Fragment;
230 ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft);
231 break;
232 default:
233 throw NotImplementedException("Stage {}", env.ShaderStage());
218 } 234 }
219 const std::span interfaces_span(interfaces.data(), interfaces.size()); 235 ctx.AddEntryPoint(execution_model, func, "main", interfaces);
220 ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span);
221
222 const std::array<u32, 3> workgroup_size{env.WorkgroupSize()};
223 ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1],
224 workgroup_size[2]);
225 236
226 SetupDenormControl(profile, program, ctx, func); 237 SetupDenormControl(profile, program, ctx, func);
238 const Info& info{program.info};
227 if (info.uses_sampled_1d) { 239 if (info.uses_sampled_1d) {
228 ctx.AddCapability(spv::Capability::Sampled1D); 240 ctx.AddCapability(spv::Capability::Sampled1D);
229 } 241 }
230 if (info.uses_sparse_residency) { 242 if (info.uses_sparse_residency) {
231 ctx.AddCapability(spv::Capability::SparseResidency); 243 ctx.AddCapability(spv::Capability::SparseResidency);
232 } 244 }
245 if (info.uses_demote_to_helper_invocation) {
246 ctx.AddExtension("SPV_EXT_demote_to_helper_invocation");
247 ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
248 }
233 // TODO: Track this usage 249 // TODO: Track this usage
234 ctx.AddCapability(spv::Capability::ImageGatherExtended); 250 ctx.AddCapability(spv::Capability::ImageGatherExtended);
235 251
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 89566c83d..ae121f534 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -16,18 +16,18 @@
16namespace Shader::Backend::SPIRV { 16namespace Shader::Backend::SPIRV {
17 17
18[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, 18[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env,
19 IR::Program& program); 19 IR::Program& program, u32& binding);
20 20
21// Microinstruction emitters 21// Microinstruction emitters
22Id EmitPhi(EmitContext& ctx, IR::Inst* inst); 22Id EmitPhi(EmitContext& ctx, IR::Inst* inst);
23void EmitVoid(EmitContext& ctx); 23void EmitVoid(EmitContext& ctx);
24Id EmitIdentity(EmitContext& ctx, const IR::Value& value); 24Id EmitIdentity(EmitContext& ctx, const IR::Value& value);
25void EmitBranch(EmitContext& ctx, IR::Block* label); 25void EmitBranch(EmitContext& ctx, Id label);
26void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, 26void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label);
27 IR::Block* false_label); 27void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label);
28void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); 28void EmitSelectionMerge(EmitContext& ctx, Id merge_label);
29void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label);
30void EmitReturn(EmitContext& ctx); 29void EmitReturn(EmitContext& ctx);
30void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label);
31void EmitGetRegister(EmitContext& ctx); 31void EmitGetRegister(EmitContext& ctx);
32void EmitSetRegister(EmitContext& ctx); 32void EmitSetRegister(EmitContext& ctx);
33void EmitGetPred(EmitContext& ctx); 33void EmitGetPred(EmitContext& ctx);
@@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
41Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); 41Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
42Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); 42Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
43Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); 43Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
44void EmitGetAttribute(EmitContext& ctx); 44Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr);
45void EmitSetAttribute(EmitContext& ctx); 45void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value);
46void EmitGetAttributeIndexed(EmitContext& ctx); 46void EmitGetAttributeIndexed(EmitContext& ctx);
47void EmitSetAttributeIndexed(EmitContext& ctx); 47void EmitSetAttributeIndexed(EmitContext& ctx);
48void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
49void EmitSetFragDepth(EmitContext& ctx, Id value);
48void EmitGetZFlag(EmitContext& ctx); 50void EmitGetZFlag(EmitContext& ctx);
49void EmitGetSFlag(EmitContext& ctx); 51void EmitGetSFlag(EmitContext& ctx);
50void EmitGetCFlag(EmitContext& ctx); 52void EmitGetCFlag(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 125b58cf7..02d115740 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -5,6 +5,43 @@
5#include "shader_recompiler/backend/spirv/emit_spirv.h" 5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6 6
7namespace Shader::Backend::SPIRV { 7namespace Shader::Backend::SPIRV {
8namespace {
9Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
10 const u32 element{static_cast<u32>(attr) % 4};
11 const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
12 if (IR::IsGeneric(attr)) {
13 const u32 index{IR::GenericAttributeIndex(attr)};
14 return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id());
15 }
16 switch (attr) {
17 case IR::Attribute::PositionX:
18 case IR::Attribute::PositionY:
19 case IR::Attribute::PositionZ:
20 case IR::Attribute::PositionW:
21 return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id());
22 default:
23 throw NotImplementedException("Read attribute {}", attr);
24 }
25}
26
27Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
28 const u32 element{static_cast<u32>(attr) % 4};
29 const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
30 if (IR::IsGeneric(attr)) {
31 const u32 index{IR::GenericAttributeIndex(attr)};
32 return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id());
33 }
34 switch (attr) {
35 case IR::Attribute::PositionX:
36 case IR::Attribute::PositionY:
37 case IR::Attribute::PositionZ:
38 case IR::Attribute::PositionW:
39 return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id());
40 default:
41 throw NotImplementedException("Read attribute {}", attr);
42 }
43}
44} // Anonymous namespace
8 45
9void EmitGetRegister(EmitContext&) { 46void EmitGetRegister(EmitContext&) {
10 throw NotImplementedException("SPIR-V Instruction"); 47 throw NotImplementedException("SPIR-V Instruction");
@@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
87 return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); 124 return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset);
88} 125}
89 126
90void EmitGetAttribute(EmitContext&) { 127Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {
91 throw NotImplementedException("SPIR-V Instruction"); 128 return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr));
92} 129}
93 130
94void EmitSetAttribute(EmitContext&) { 131void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) {
95 throw NotImplementedException("SPIR-V Instruction"); 132 ctx.OpStore(OutputAttrPointer(ctx, attr), value);
96} 133}
97 134
98void EmitGetAttributeIndexed(EmitContext&) { 135void EmitGetAttributeIndexed(EmitContext&) {
@@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) {
103 throw NotImplementedException("SPIR-V Instruction"); 140 throw NotImplementedException("SPIR-V Instruction");
104} 141}
105 142
143void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
144 const Id component_id{ctx.Constant(ctx.U32[1], component)};
145 const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)};
146 ctx.OpStore(pointer, value);
147}
148
149void EmitSetFragDepth(EmitContext& ctx, Id value) {
150 ctx.OpStore(ctx.frag_depth, value);
151}
152
106void EmitGetZFlag(EmitContext&) { 153void EmitGetZFlag(EmitContext&) {
107 throw NotImplementedException("SPIR-V Instruction"); 154 throw NotImplementedException("SPIR-V Instruction");
108} 155}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
index 48755b827..6b81f0169 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp
@@ -6,26 +6,29 @@
6 6
7namespace Shader::Backend::SPIRV { 7namespace Shader::Backend::SPIRV {
8 8
9void EmitBranch(EmitContext& ctx, IR::Block* label) { 9void EmitBranch(EmitContext& ctx, Id label) {
10 ctx.OpBranch(label->Definition<Id>()); 10 ctx.OpBranch(label);
11} 11}
12 12
13void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, 13void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) {
14 IR::Block* false_label) { 14 ctx.OpBranchConditional(condition, true_label, false_label);
15 ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>());
16} 15}
17 16
18void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { 17void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) {
19 ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(), 18 ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
20 spv::LoopControlMask::MaskNone);
21} 19}
22 20
23void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { 21void EmitSelectionMerge(EmitContext& ctx, Id merge_label) {
24 ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone); 22 ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
25} 23}
26 24
27void EmitReturn(EmitContext& ctx) { 25void EmitReturn(EmitContext& ctx) {
28 ctx.OpReturn(); 26 ctx.OpReturn();
29} 27}
30 28
29void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) {
30 ctx.OpDemoteToHelperInvocationEXT();
31 ctx.OpBranch(continue_label);
32}
33
31} // namespace Shader::Backend::SPIRV 34} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h
index 0fcb68050..1fcaa56dd 100644
--- a/src/shader_recompiler/environment.h
+++ b/src/shader_recompiler/environment.h
@@ -3,6 +3,8 @@
3#include <array> 3#include <array>
4 4
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "shader_recompiler/stage.h"
7#include "shader_recompiler/program_header.h"
6 8
7namespace Shader { 9namespace Shader {
8 10
@@ -15,6 +17,18 @@ public:
15 [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; 17 [[nodiscard]] virtual u32 TextureBoundBuffer() = 0;
16 18
17 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; 19 [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
20
21 [[nodiscard]] const ProgramHeader& SPH() const noexcept {
22 return sph;
23 }
24
25 [[nodiscard]] Stage ShaderStage() const noexcept {
26 return stage;
27 }
28
29protected:
30 ProgramHeader sph{};
31 Stage stage{};
18}; 32};
19 33
20} // namespace Shader 34} // namespace Shader
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 2fb7d576f..4811242ea 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept {
13 return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; 13 return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
14} 14}
15 15
16int GenericAttributeIndex(Attribute attribute) { 16u32 GenericAttributeIndex(Attribute attribute) {
17 if (!IsGeneric(attribute)) { 17 if (!IsGeneric(attribute)) {
18 throw InvalidArgument("Attribute is not generic {}", attribute); 18 throw InvalidArgument("Attribute is not generic {}", attribute);
19 } 19 }
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
index bb2cad6af..34ec7e0cd 100644
--- a/src/shader_recompiler/frontend/ir/attribute.h
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -224,7 +224,7 @@ enum class Attribute : u64 {
224 224
225[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; 225[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
226 226
227[[nodiscard]] int GenericAttributeIndex(Attribute attribute); 227[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
228 228
229[[nodiscard]] std::string NameOf(Attribute attribute); 229[[nodiscard]] std::string NameOf(Attribute attribute);
230 230
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 958282160..672836c0b 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -82,6 +82,12 @@ void IREmitter::Return() {
82 Inst(Opcode::Return); 82 Inst(Opcode::Return);
83} 83}
84 84
85void IREmitter::DemoteToHelperInvocation(Block* continue_label) {
86 block->SetBranch(continue_label);
87 continue_label->AddImmediatePredecessor(block);
88 Inst(Opcode::DemoteToHelperInvocation, continue_label);
89}
90
85U32 IREmitter::GetReg(IR::Reg reg) { 91U32 IREmitter::GetReg(IR::Reg reg) {
86 return Inst<U32>(Opcode::GetRegister, reg); 92 return Inst<U32>(Opcode::GetRegister, reg);
87} 93}
@@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
248 Inst(Opcode::SetAttribute, attribute, value); 254 Inst(Opcode::SetAttribute, attribute, value);
249} 255}
250 256
257void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
258 Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
259}
260
261void IREmitter::SetFragDepth(const F32& value) {
262 Inst(Opcode::SetFragDepth, value);
263}
264
251U32 IREmitter::WorkgroupIdX() { 265U32 IREmitter::WorkgroupIdX() {
252 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; 266 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
253} 267}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 05263fe8b..72af5db37 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -36,6 +36,7 @@ public:
36 void LoopMerge(Block* merge_block, Block* continue_target); 36 void LoopMerge(Block* merge_block, Block* continue_target);
37 void SelectionMerge(Block* merge_block); 37 void SelectionMerge(Block* merge_block);
38 void Return(); 38 void Return();
39 void DemoteToHelperInvocation(Block* continue_label);
39 40
40 [[nodiscard]] U32 GetReg(IR::Reg reg); 41 [[nodiscard]] U32 GetReg(IR::Reg reg);
41 void SetReg(IR::Reg reg, const U32& value); 42 void SetReg(IR::Reg reg, const U32& value);
@@ -67,6 +68,9 @@ public:
67 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); 68 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
68 void SetAttribute(IR::Attribute attribute, const F32& value); 69 void SetAttribute(IR::Attribute attribute, const F32& value);
69 70
71 void SetFragColor(u32 index, u32 component, const F32& value);
72 void SetFragDepth(const F32& value);
73
70 [[nodiscard]] U32 WorkgroupIdX(); 74 [[nodiscard]] U32 WorkgroupIdX();
71 [[nodiscard]] U32 WorkgroupIdY(); 75 [[nodiscard]] U32 WorkgroupIdY();
72 [[nodiscard]] U32 WorkgroupIdZ(); 76 [[nodiscard]] U32 WorkgroupIdZ();
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 5946105d2..21b7d8a9f 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept {
55 case Opcode::LoopMerge: 55 case Opcode::LoopMerge:
56 case Opcode::SelectionMerge: 56 case Opcode::SelectionMerge:
57 case Opcode::Return: 57 case Opcode::Return:
58 case Opcode::DemoteToHelperInvocation:
58 case Opcode::SetAttribute: 59 case Opcode::SetAttribute:
59 case Opcode::SetAttributeIndexed: 60 case Opcode::SetAttributeIndexed:
61 case Opcode::SetFragColor:
62 case Opcode::SetFragDepth:
60 case Opcode::WriteGlobalU8: 63 case Opcode::WriteGlobalU8:
61 case Opcode::WriteGlobalS8: 64 case Opcode::WriteGlobalS8:
62 case Opcode::WriteGlobalU16: 65 case Opcode::WriteGlobalU16:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 9052a4903..593faca52 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1,
13OPCODE(LoopMerge, Void, Label, Label, ) 13OPCODE(LoopMerge, Void, Label, Label, )
14OPCODE(SelectionMerge, Void, Label, ) 14OPCODE(SelectionMerge, Void, Label, )
15OPCODE(Return, Void, ) 15OPCODE(Return, Void, )
16OPCODE(DemoteToHelperInvocation, Void, Label, )
16 17
17// Context getters/setters 18// Context getters/setters
18OPCODE(GetRegister, U32, Reg, ) 19OPCODE(GetRegister, U32, Reg, )
@@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32,
28OPCODE(GetCbufU32, U32, U32, U32, ) 29OPCODE(GetCbufU32, U32, U32, U32, )
29OPCODE(GetCbufF32, F32, U32, U32, ) 30OPCODE(GetCbufF32, F32, U32, U32, )
30OPCODE(GetCbufU64, U64, U32, U32, ) 31OPCODE(GetCbufU64, U64, U32, U32, )
31OPCODE(GetAttribute, U32, Attribute, ) 32OPCODE(GetAttribute, F32, Attribute, )
32OPCODE(SetAttribute, Void, Attribute, U32, ) 33OPCODE(SetAttribute, Void, Attribute, F32, )
33OPCODE(GetAttributeIndexed, U32, U32, ) 34OPCODE(GetAttributeIndexed, F32, U32, )
34OPCODE(SetAttributeIndexed, Void, U32, U32, ) 35OPCODE(SetAttributeIndexed, Void, U32, F32, )
36OPCODE(SetFragColor, Void, U32, U32, F32, )
37OPCODE(SetFragDepth, Void, F32, )
35OPCODE(GetZFlag, U1, Void, ) 38OPCODE(GetZFlag, U1, Void, )
36OPCODE(GetSFlag, U1, Void, ) 39OPCODE(GetSFlag, U1, Void, )
37OPCODE(GetCFlag, U1, Void, ) 40OPCODE(GetCFlag, U1, Void, )
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
index bce8b19b3..733513c8b 100644
--- a/src/shader_recompiler/frontend/ir/program.h
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -10,6 +10,7 @@
10 10
11#include "shader_recompiler/frontend/ir/basic_block.h" 11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/shader_info.h" 12#include "shader_recompiler/shader_info.h"
13#include "shader_recompiler/stage.h"
13 14
14namespace Shader::IR { 15namespace Shader::IR {
15 16
@@ -17,6 +18,7 @@ struct Program {
17 BlockList blocks; 18 BlockList blocks;
18 BlockList post_order_blocks; 19 BlockList post_order_blocks;
19 Info info; 20 Info info;
21 Stage stage{};
20}; 22};
21 23
22[[nodiscard]] std::string DumpProgram(const Program& program); 24[[nodiscard]] std::string DumpProgram(const Program& program);
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
index 8fea05f7b..3845ec5fb 100644
--- a/src/shader_recompiler/frontend/ir/reg.h
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256;
293 return reg + (-num); 293 return reg + (-num);
294} 294}
295 295
296[[nodiscard]] constexpr Reg operator++(Reg& reg) { 296constexpr Reg operator++(Reg& reg) {
297 reg = reg + 1; 297 reg = reg + 1;
298 return reg; 298 return reg;
299} 299}
300 300
301[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { 301constexpr Reg operator++(Reg& reg, int) {
302 const Reg copy{reg}; 302 const Reg copy{reg};
303 reg = reg + 1; 303 reg = reg + 1;
304 return copy; 304 return copy;
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
index 715c0e92d..4f6707fae 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) {
104 case Opcode::EXIT: 104 case Opcode::EXIT:
105 case Opcode::JMP: 105 case Opcode::JMP:
106 case Opcode::JMX: 106 case Opcode::JMX:
107 case Opcode::KIL:
107 case Opcode::BRK: 108 case Opcode::BRK:
108 case Opcode::CONT: 109 case Opcode::CONT:
109 case Opcode::LONGJMP: 110 case Opcode::LONGJMP:
@@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
287 block->end = pc; 288 block->end = pc;
288 return AnalysisState::Branch; 289 return AnalysisState::Branch;
289 } 290 }
291 case Opcode::KIL: {
292 const Predicate pred{inst.Pred()};
293 const auto ir_pred{static_cast<IR::Pred>(pred.index)};
294 const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
295 AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
296 return AnalysisState::Branch;
297 }
290 case Opcode::PBK: 298 case Opcode::PBK:
291 case Opcode::PCNT: 299 case Opcode::PCNT:
292 case Opcode::PEXIT: 300 case Opcode::PEXIT:
@@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati
324 return AnalysisState::Continue; 332 return AnalysisState::Continue;
325 } 333 }
326 const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; 334 const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
327 AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); 335 AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
328 return AnalysisState::Branch; 336 return AnalysisState::Branch;
329} 337}
330 338
331void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, 339void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
332 EndClass insn_end_class, IR::Condition cond, 340 EndClass insn_end_class, IR::Condition cond) {
333 bool visit_conditional_inst) {
334 if (block->begin != pc) { 341 if (block->begin != pc) {
335 // If the block doesn't start in the conditional instruction 342 // If the block doesn't start in the conditional instruction
336 // mark it as a label to visit it later 343 // mark it as a label to visit it later
@@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
356 // Impersonate the visited block with a virtual block 363 // Impersonate the visited block with a virtual block
357 *block = std::move(virtual_block); 364 *block = std::move(virtual_block);
358 // Set the end properties of the conditional instruction 365 // Set the end properties of the conditional instruction
359 conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; 366 conditional_block->end = pc + 1;
360 conditional_block->end_class = insn_end_class; 367 conditional_block->end_class = insn_end_class;
361 // Add a label to the instruction after the conditional instruction 368 // Add a label to the instruction after the conditional instruction
362 Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; 369 Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
363 // Branch to the next instruction from the virtual block 370 // Branch to the next instruction from the virtual block
364 block->branch_false = endif_block; 371 block->branch_false = endif_block;
365 // And branch to it from the conditional instruction if it is a branch 372 // And branch to it from the conditional instruction if it is a branch or a kill instruction
366 if (insn_end_class == EndClass::Branch) { 373 // Kill instructions are considered a branch because they demote to a helper invocation and
374 // execution may continue.
375 if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
367 conditional_block->cond = IR::Condition{true}; 376 conditional_block->cond = IR::Condition{true};
368 conditional_block->branch_true = endif_block; 377 conditional_block->branch_true = endif_block;
369 conditional_block->branch_false = nullptr; 378 conditional_block->branch_false = nullptr;
@@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati
415 throw NotImplementedException("Conditional EXIT with PEXIT token"); 424 throw NotImplementedException("Conditional EXIT with PEXIT token");
416 } 425 }
417 const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; 426 const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
418 AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); 427 AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
419 return AnalysisState::Branch; 428 return AnalysisState::Branch;
420 } 429 }
421 if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { 430 if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
@@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati
425 block->branch_false = nullptr; 434 block->branch_false = nullptr;
426 return AnalysisState::Branch; 435 return AnalysisState::Branch;
427 } 436 }
428 block->end = pc; 437 block->end = pc + 1;
429 block->end_class = EndClass::Exit; 438 block->end_class = EndClass::Exit;
430 return AnalysisState::Branch; 439 return AnalysisState::Branch;
431} 440}
@@ -505,6 +514,12 @@ std::string CFG::Dot() const {
505 node_uid); 514 node_uid);
506 ++node_uid; 515 ++node_uid;
507 break; 516 break;
517 case EndClass::Kill:
518 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
519 dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
520 node_uid);
521 ++node_uid;
522 break;
508 } 523 }
509 } 524 }
510 if (function.entrypoint == 8) { 525 if (function.entrypoint == 8) {
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
index fe74f210f..22f134194 100644
--- a/src/shader_recompiler/frontend/maxwell/control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -29,6 +29,7 @@ enum class EndClass {
29 Call, 29 Call,
30 Exit, 30 Exit,
31 Return, 31 Return,
32 Kill,
32}; 33};
33 34
34enum class Token { 35enum class Token {
@@ -130,7 +131,7 @@ private:
130 AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); 131 AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
131 132
132 void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, 133 void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
133 IR::Condition cond, bool visit_conditional_inst); 134 IR::Condition cond);
134 135
135 /// Return true when the branch instruction is confirmed to be a branch 136 /// Return true when the branch instruction is confirmed to be a branch
136 bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, 137 bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 8bfa64326..0074eb89b 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
32 IR::Program program; 32 IR::Program program;
33 program.blocks = VisitAST(inst_pool, block_pool, env, cfg); 33 program.blocks = VisitAST(inst_pool, block_pool, env, cfg);
34 program.post_order_blocks = PostOrder(program.blocks); 34 program.post_order_blocks = PostOrder(program.blocks);
35 program.stage = env.ShaderStage();
35 RemoveUnreachableBlocks(program); 36 RemoveUnreachableBlocks(program);
36 37
37 // Replace instructions before the SSA rewrite 38 // Replace instructions before the SSA rewrite
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 5f5d9cf17..cec03e73e 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -45,6 +45,7 @@ enum class StatementType {
45 Loop, 45 Loop,
46 Break, 46 Break,
47 Return, 47 Return,
48 Kill,
48 Function, 49 Function,
49 Identity, 50 Identity,
50 Not, 51 Not,
@@ -70,6 +71,7 @@ struct If {};
70struct Loop {}; 71struct Loop {};
71struct Break {}; 72struct Break {};
72struct Return {}; 73struct Return {};
74struct Kill {};
73struct FunctionTag {}; 75struct FunctionTag {};
74struct Identity {}; 76struct Identity {};
75struct Not {}; 77struct Not {};
@@ -93,6 +95,7 @@ struct Statement : ListBaseHook {
93 Statement(Break, Statement* cond_, Statement* up_) 95 Statement(Break, Statement* cond_, Statement* up_)
94 : cond{cond_}, up{up_}, type{StatementType::Break} {} 96 : cond{cond_}, up{up_}, type{StatementType::Break} {}
95 Statement(Return) : type{StatementType::Return} {} 97 Statement(Return) : type{StatementType::Return} {}
98 Statement(Kill) : type{StatementType::Kill} {}
96 Statement(FunctionTag) : children{}, type{StatementType::Function} {} 99 Statement(FunctionTag) : children{}, type{StatementType::Function} {}
97 Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} 100 Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {}
98 Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} 101 Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {}
@@ -174,6 +177,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) {
174 case StatementType::Return: 177 case StatementType::Return:
175 ret += fmt::format("{} return;\n", indent); 178 ret += fmt::format("{} return;\n", indent);
176 break; 179 break;
180 case StatementType::Kill:
181 ret += fmt::format("{} kill;\n", indent);
182 break;
177 case StatementType::SetVariable: 183 case StatementType::SetVariable:
178 ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); 184 ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
179 break; 185 break;
@@ -424,6 +430,9 @@ private:
424 gotos.push_back(root.insert(ip, *goto_stmt)); 430 gotos.push_back(root.insert(ip, *goto_stmt));
425 break; 431 break;
426 } 432 }
433 case Flow::EndClass::Kill:
434 root.insert(ip, *pool.Create(Kill{}));
435 break;
427 } 436 }
428 } 437 }
429 } 438 }
@@ -729,6 +738,15 @@ private:
729 current_block = nullptr; 738 current_block = nullptr;
730 break; 739 break;
731 } 740 }
741 case StatementType::Kill: {
742 if (!current_block) {
743 current_block = block_pool.Create(inst_pool);
744 block_list.push_back(current_block);
745 }
746 IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block);
747 current_block = nullptr;
748 break;
749 }
732 default: 750 default:
733 throw NotImplementedException("Statement type {}", stmt.type); 751 throw NotImplementedException("Statement type {}", stmt.type);
734 } 752 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp
deleted file mode 100644
index e98bbd0d1..000000000
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10
11void TranslatorVisitor::EXIT(u64) {
12 ir.Exit();
13}
14
15} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..ea9b33da9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ExitFragment(TranslatorVisitor& v) {
12 const ProgramHeader sph{v.env.SPH()};
13 IR::Reg src_reg{IR::Reg::R0};
14 for (u32 render_target = 0; render_target < 8; ++render_target) {
15 const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
16 for (u32 component = 0; component < 4; ++component) {
17 if (!mask[component]) {
18 continue;
19 }
20 v.ir.SetFragColor(render_target, component, v.F(src_reg));
21 ++src_reg;
22 }
23 }
24 if (sph.ps.omap.sample_mask != 0) {
25 throw NotImplementedException("Sample mask");
26 }
27 if (sph.ps.omap.depth != 0) {
28 throw NotImplementedException("Fragment depth");
29 }
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::EXIT() {
34 switch (env.ShaderStage()) {
35 case Stage::Fragment:
36 ExitFragment(*this);
37 break;
38 default:
39 break;
40 }
41}
42
43} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index e3e298c3b..ed81d9c36 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -108,7 +108,7 @@ public:
108 void DSETP_reg(u64 insn); 108 void DSETP_reg(u64 insn);
109 void DSETP_cbuf(u64 insn); 109 void DSETP_cbuf(u64 insn);
110 void DSETP_imm(u64 insn); 110 void DSETP_imm(u64 insn);
111 void EXIT(u64 insn); 111 void EXIT();
112 void F2F_reg(u64 insn); 112 void F2F_reg(u64 insn);
113 void F2F_cbuf(u64 insn); 113 void F2F_cbuf(u64 insn);
114 void F2F_imm(u64 insn); 114 void F2F_imm(u64 insn);
@@ -220,7 +220,7 @@ public:
220 void JCAL(u64 insn); 220 void JCAL(u64 insn);
221 void JMP(u64 insn); 221 void JMP(u64 insn);
222 void JMX(u64 insn); 222 void JMX(u64 insn);
223 void KIL(u64 insn); 223 void KIL();
224 void LD(u64 insn); 224 void LD(u64 insn);
225 void LDC(u64 insn); 225 void LDC(u64 insn);
226 void LDG(u64 insn); 226 void LDG(u64 insn);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
index ad97786d4..2922145ee 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -11,6 +11,13 @@
11 11
12namespace Shader::Maxwell { 12namespace Shader::Maxwell {
13namespace { 13namespace {
14enum class Size : u64 {
15 B32,
16 B64,
17 B96,
18 B128,
19};
20
14enum class InterpolationMode : u64 { 21enum class InterpolationMode : u64 {
15 Pass, 22 Pass,
16 Multiply, 23 Multiply,
@@ -23,8 +30,85 @@ enum class SampleMode : u64 {
23 Centroid, 30 Centroid,
24 Offset, 31 Offset,
25}; 32};
33
34int NumElements(Size size) {
35 switch (size) {
36 case Size::B32:
37 return 1;
38 case Size::B64:
39 return 2;
40 case Size::B96:
41 return 3;
42 case Size::B128:
43 return 4;
44 }
45 throw InvalidArgument("Invalid size {}", size);
46}
26} // Anonymous namespace 47} // Anonymous namespace
27 48
49void TranslatorVisitor::ALD(u64 insn) {
50 union {
51 u64 raw;
52 BitField<0, 8, IR::Reg> dest_reg;
53 BitField<8, 8, IR::Reg> index_reg;
54 BitField<20, 10, u64> absolute_offset;
55 BitField<20, 11, s64> relative_offset;
56 BitField<39, 8, IR::Reg> stream_reg;
57 BitField<32, 1, u64> o;
58 BitField<31, 1, u64> patch;
59 BitField<47, 2, Size> size;
60 } const ald{insn};
61
62 if (ald.o != 0) {
63 throw NotImplementedException("O");
64 }
65 if (ald.patch != 0) {
66 throw NotImplementedException("P");
67 }
68 if (ald.index_reg != IR::Reg::RZ) {
69 throw NotImplementedException("Indexed");
70 }
71 const u64 offset{ald.absolute_offset.Value()};
72 if (offset % 4 != 0) {
73 throw NotImplementedException("Unaligned absolute offset {}", offset);
74 }
75 const int num_elements{NumElements(ald.size)};
76 for (int element = 0; element < num_elements; ++element) {
77 F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element}));
78 }
79}
80
81void TranslatorVisitor::AST(u64 insn) {
82 union {
83 u64 raw;
84 BitField<0, 8, IR::Reg> src_reg;
85 BitField<8, 8, IR::Reg> index_reg;
86 BitField<20, 10, u64> absolute_offset;
87 BitField<20, 11, s64> relative_offset;
88 BitField<31, 1, u64> patch;
89 BitField<39, 8, IR::Reg> stream_reg;
90 BitField<47, 2, Size> size;
91 } const ast{insn};
92
93 if (ast.patch != 0) {
94 throw NotImplementedException("P");
95 }
96 if (ast.stream_reg != IR::Reg::RZ) {
97 throw NotImplementedException("Stream store");
98 }
99 if (ast.index_reg != IR::Reg::RZ) {
100 throw NotImplementedException("Indexed store");
101 }
102 const u64 offset{ast.absolute_offset.Value()};
103 if (offset % 4 != 0) {
104 throw NotImplementedException("Unaligned absolute offset {}", offset);
105 }
106 const int num_elements{NumElements(ast.size)};
107 for (int element = 0; element < num_elements; ++element) {
108 ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element));
109 }
110}
111
28void TranslatorVisitor::IPA(u64 insn) { 112void TranslatorVisitor::IPA(u64 insn) {
29 // IPA is the instruction used to read varyings from a fragment shader. 113 // IPA is the instruction used to read varyings from a fragment shader.
30 // gl_FragCoord is mapped to the gl_Position attribute. 114 // gl_FragCoord is mapped to the gl_Position attribute.
@@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) {
51 // } 135 // }
52 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; 136 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
53 if (is_indexed) { 137 if (is_indexed) {
54 throw NotImplementedException("IPA.IDX"); 138 throw NotImplementedException("IDX");
55 } 139 }
56 140
57 const IR::Attribute attribute{ipa.attribute}; 141 const IR::Attribute attribute{ipa.attribute};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 9675cef54..59252bcc5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) {
17 ThrowNotImplemented(Opcode::AL2P); 17 ThrowNotImplemented(Opcode::AL2P);
18} 18}
19 19
20void TranslatorVisitor::ALD(u64) {
21 ThrowNotImplemented(Opcode::ALD);
22}
23
24void TranslatorVisitor::AST(u64) {
25 ThrowNotImplemented(Opcode::AST);
26}
27
28void TranslatorVisitor::ATOM_cas(u64) { 20void TranslatorVisitor::ATOM_cas(u64) {
29 ThrowNotImplemented(Opcode::ATOM_cas); 21 ThrowNotImplemented(Opcode::ATOM_cas);
30} 22}
@@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) {
153 ThrowNotImplemented(Opcode::DSETP_imm); 145 ThrowNotImplemented(Opcode::DSETP_imm);
154} 146}
155 147
156void TranslatorVisitor::EXIT(u64) {
157 throw LogicError("Visting EXIT instruction");
158}
159
160void TranslatorVisitor::F2F_reg(u64) { 148void TranslatorVisitor::F2F_reg(u64) {
161 ThrowNotImplemented(Opcode::F2F_reg); 149 ThrowNotImplemented(Opcode::F2F_reg);
162} 150}
@@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) {
345 ThrowNotImplemented(Opcode::JMX); 333 ThrowNotImplemented(Opcode::JMX);
346} 334}
347 335
348void TranslatorVisitor::KIL(u64) { 336void TranslatorVisitor::KIL() {
349 ThrowNotImplemented(Opcode::KIL); 337 // KIL is a no-op
350} 338}
351 339
352void TranslatorVisitor::LD(u64) { 340void TranslatorVisitor::LD(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index 98d9f4c64..0fbb87ec4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) {
215 BitField<36, 13, u64> cbuf_offset; 215 BitField<36, 13, u64> cbuf_offset;
216 } const tex{insn}; 216 } const tex{insn};
217 217
218 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset)); 218 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
219} 219}
220 220
221void TranslatorVisitor::TEX_b(u64 insn) { 221void TranslatorVisitor::TEX_b(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
index ac1615b00..54f0df754 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
70 70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) { 71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding texs{insn}; 72 const Encoding texs{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))}; 73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
74 const IR::F32 zero{v.ir.Imm32(0.0f)}; 74 const IR::F32 zero{v.ir.Imm32(0.0f)};
75 const IR::Reg reg_a{texs.src_reg_a}; 75 const IR::Reg reg_a{texs.src_reg_a};
76 const IR::Reg reg_b{texs.src_reg_b}; 76 const IR::Reg reg_b{texs.src_reg_b};
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 708b6b267..fbbe28632 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
17 return; 17 return;
18 } 18 }
19 info.constant_buffer_mask |= 1U << index; 19 info.constant_buffer_mask |= 1U << index;
20 info.constant_buffer_descriptors.push_back({ 20
21 .index{index}, 21 auto& cbufs{info.constant_buffer_descriptors};
22 .count{1}, 22 cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
23 }); 23 ConstantBufferDescriptor{
24 .index{index},
25 .count{1},
26 });
27}
28
29void GetAttribute(Info& info, IR::Attribute attribute) {
30 if (IR::IsGeneric(attribute)) {
31 info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true;
32 return;
33 }
34 switch (attribute) {
35 case IR::Attribute::PositionX:
36 case IR::Attribute::PositionY:
37 case IR::Attribute::PositionZ:
38 case IR::Attribute::PositionW:
39 info.loads_position = true;
40 break;
41 default:
42 throw NotImplementedException("Get attribute {}", attribute);
43 }
44}
45
46void SetAttribute(Info& info, IR::Attribute attribute) {
47 if (IR::IsGeneric(attribute)) {
48 info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true;
49 return;
50 }
51 switch (attribute) {
52 case IR::Attribute::PositionX:
53 case IR::Attribute::PositionY:
54 case IR::Attribute::PositionZ:
55 case IR::Attribute::PositionW:
56 info.stores_position = true;
57 break;
58 default:
59 throw NotImplementedException("Set attribute {}", attribute);
60 }
24} 61}
25 62
26void VisitUsages(Info& info, IR::Inst& inst) { 63void VisitUsages(Info& info, IR::Inst& inst) {
@@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) {
162 break; 199 break;
163 } 200 }
164 switch (inst.Opcode()) { 201 switch (inst.Opcode()) {
202 case IR::Opcode::DemoteToHelperInvocation:
203 info.uses_demote_to_helper_invocation = true;
204 break;
205 case IR::Opcode::GetAttribute:
206 GetAttribute(info, inst.Arg(0).Attribute());
207 break;
208 case IR::Opcode::SetAttribute:
209 SetAttribute(info, inst.Arg(0).Attribute());
210 break;
211 case IR::Opcode::SetFragColor:
212 info.stores_frag_color[inst.Arg(0).U32()] = true;
213 break;
214 case IR::Opcode::SetFragDepth:
215 info.stores_frag_depth = true;
216 break;
165 case IR::Opcode::WorkgroupId: 217 case IR::Opcode::WorkgroupId:
166 info.uses_workgroup_id = true; 218 info.uses_workgroup_id = true;
167 break; 219 break;
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index d09bcec36..bab7ca186 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -169,7 +169,7 @@ private:
169 const size_t num_args{phi.NumArgs()}; 169 const size_t num_args{phi.NumArgs()};
170 for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { 170 for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
171 const IR::Value& op{phi.Arg(arg_index)}; 171 const IR::Value& op{phi.Arg(arg_index)};
172 if (op == same || op == IR::Value{&phi}) { 172 if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
173 // Unique value or self-reference 173 // Unique value or self-reference
174 continue; 174 continue;
175 } 175 }
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h
new file mode 100644
index 000000000..1544bfa42
--- /dev/null
+++ b/src/shader_recompiler/program_header.h
@@ -0,0 +1,143 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Shader {
15
16enum class OutputTopology : u32 {
17 PointList = 1,
18 LineStrip = 6,
19 TriangleStrip = 7,
20};
21
22enum class PixelImap : u8 {
23 Unused = 0,
24 Constant = 1,
25 Perspective = 2,
26 ScreenLinear = 3,
27};
28
29// Documentation in:
30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
31struct ProgramHeader {
32 union {
33 BitField<0, 5, u32> sph_type;
34 BitField<5, 5, u32> version;
35 BitField<10, 4, u32> shader_type;
36 BitField<14, 1, u32> mrt_enable;
37 BitField<15, 1, u32> kills_pixels;
38 BitField<16, 1, u32> does_global_store;
39 BitField<17, 4, u32> sass_version;
40 BitField<21, 5, u32> reserved;
41 BitField<26, 1, u32> does_load_or_store;
42 BitField<27, 1, u32> does_fp64;
43 BitField<28, 4, u32> stream_out_mask;
44 } common0;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_low_size;
48 BitField<24, 8, u32> per_patch_attribute_count;
49 } common1;
50
51 union {
52 BitField<0, 24, u32> shader_local_memory_high_size;
53 BitField<24, 8, u32> threads_per_input_primitive;
54 } common2;
55
56 union {
57 BitField<0, 24, u32> shader_local_memory_crs_size;
58 BitField<24, 4, OutputTopology> output_topology;
59 BitField<28, 4, u32> reserved;
60 } common3;
61
62 union {
63 BitField<0, 12, u32> max_output_vertices;
64 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
65 BitField<20, 4, u32> reserved;
66 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
67 } common4;
68
69 union {
70 struct {
71 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
72 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
73 INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
74 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
75 union {
76 BitField<0, 8, u16> clip_distances;
77 BitField<8, 1, u16> point_sprite_s;
78 BitField<9, 1, u16> point_sprite_t;
79 BitField<10, 1, u16> fog_coordinate;
80 BitField<12, 1, u16> tessellation_eval_point_u;
81 BitField<13, 1, u16> tessellation_eval_point_v;
82 BitField<14, 1, u16> instance_id;
83 BitField<15, 1, u16> vertex_id;
84 };
85 INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
86 INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
87 INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
88 INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
89 INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
90 INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
91 INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
92 INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
93 INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
94 } vtg;
95
96 struct {
97 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
98 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
99
100 union {
101 BitField<0, 2, PixelImap> x;
102 BitField<2, 2, PixelImap> y;
103 BitField<4, 2, PixelImap> z;
104 BitField<6, 2, PixelImap> w;
105 u8 raw;
106 } imap_generic_vector[32];
107
108 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
109 INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
110 INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
111 INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
112
113 struct {
114 u32 target;
115 union {
116 BitField<0, 1, u32> sample_mask;
117 BitField<1, 1, u32> depth;
118 BitField<2, 30, u32> reserved;
119 };
120 } omap;
121
122 [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept {
123 const u32 bits{omap.target >> (rt * 4)};
124 return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0};
125 }
126
127 [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const {
128 const auto& vector{imap_generic_vector[attribute]};
129 return {vector.x, vector.y, vector.z, vector.w};
130 }
131 } ps;
132
133 std::array<u32, 0xf> raw;
134 };
135
136 [[nodiscard]] u64 LocalMemorySize() const noexcept {
137 return (common1.shader_local_memory_low_size |
138 (common2.shader_local_memory_high_size << 24));
139 }
140};
141static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size");
142
143} // namespace Shader
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
deleted file mode 100644
index 527e19c27..000000000
--- a/src/shader_recompiler/recompiler.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "common/common_types.h"
8#include "shader_recompiler/backend/spirv/emit_spirv.h"
9#include "shader_recompiler/environment.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/frontend/maxwell/program.h"
12#include "shader_recompiler/object_pool.h"
13#include "shader_recompiler/recompiler.h"
14
15namespace Shader {
16
17std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env,
18 u32 start_address) {
19 ObjectPool<Maxwell::Flow::Block> flow_block_pool;
20 ObjectPool<IR::Inst> inst_pool;
21 ObjectPool<IR::Block> block_pool;
22
23 Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
24 IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
25 return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)};
26}
27
28} // namespace Shader
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h
deleted file mode 100644
index 2529463ae..000000000
--- a/src/shader_recompiler/recompiler.h
+++ /dev/null
@@ -1,20 +0,0 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/profile.h"
13#include "shader_recompiler/shader_info.h"
14
15namespace Shader {
16
17[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile,
18 Environment& env, u32 start_address);
19
20} // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index adc1d9a64..6eff762e2 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -56,6 +56,15 @@ struct Info {
56 56
57 bool uses_workgroup_id{}; 57 bool uses_workgroup_id{};
58 bool uses_local_invocation_id{}; 58 bool uses_local_invocation_id{};
59
60 std::array<bool, 32> loads_generics{};
61 bool loads_position{};
62
63 std::array<bool, 8> stores_frag_color{};
64 bool stores_frag_depth{};
65 std::array<bool, 32> stores_generics{};
66 bool stores_position{};
67
59 bool uses_fp16{}; 68 bool uses_fp16{};
60 bool uses_fp64{}; 69 bool uses_fp64{};
61 bool uses_fp16_denorms_flush{}; 70 bool uses_fp16_denorms_flush{};
@@ -68,6 +77,7 @@ struct Info {
68 bool uses_image_1d{}; 77 bool uses_image_1d{};
69 bool uses_sampled_1d{}; 78 bool uses_sampled_1d{};
70 bool uses_sparse_residency{}; 79 bool uses_sparse_residency{};
80 bool uses_demote_to_helper_invocation{};
71 81
72 IR::Type used_constant_buffer_types{}; 82 IR::Type used_constant_buffer_types{};
73 83
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h
new file mode 100644
index 000000000..fc6ce6043
--- /dev/null
+++ b/src/shader_recompiler/stage.h
@@ -0,0 +1,19 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Shader {
8
9enum class Stage {
10 Compute,
11 VertexA,
12 VertexB,
13 TessellationControl,
14 TessellationEval,
15 Geometry,
16 Fragment,
17};
18
19} // namespace Shader
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3323e6916..71b07c194 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -100,6 +100,7 @@ add_library(video_core STATIC
100 renderer_vulkan/fixed_pipeline_state.h 100 renderer_vulkan/fixed_pipeline_state.h
101 renderer_vulkan/maxwell_to_vk.cpp 101 renderer_vulkan/maxwell_to_vk.cpp
102 renderer_vulkan/maxwell_to_vk.h 102 renderer_vulkan/maxwell_to_vk.h
103 renderer_vulkan/pipeline_helper.h
103 renderer_vulkan/renderer_vulkan.h 104 renderer_vulkan/renderer_vulkan.h
104 renderer_vulkan/renderer_vulkan.cpp 105 renderer_vulkan/renderer_vulkan.cpp
105 renderer_vulkan/vk_blit_screen.cpp 106 renderer_vulkan/vk_blit_screen.cpp
@@ -116,15 +117,18 @@ add_library(video_core STATIC
116 renderer_vulkan/vk_descriptor_pool.h 117 renderer_vulkan/vk_descriptor_pool.h
117 renderer_vulkan/vk_fence_manager.cpp 118 renderer_vulkan/vk_fence_manager.cpp
118 renderer_vulkan/vk_fence_manager.h 119 renderer_vulkan/vk_fence_manager.h
120 renderer_vulkan/vk_graphics_pipeline.cpp
121 renderer_vulkan/vk_graphics_pipeline.h
119 renderer_vulkan/vk_master_semaphore.cpp 122 renderer_vulkan/vk_master_semaphore.cpp
120 renderer_vulkan/vk_master_semaphore.h 123 renderer_vulkan/vk_master_semaphore.h
121 renderer_vulkan/vk_pipeline_cache.cpp 124 renderer_vulkan/vk_pipeline_cache.cpp
122 renderer_vulkan/vk_pipeline_cache.h 125 renderer_vulkan/vk_pipeline_cache.h
123 renderer_vulkan/vk_pipeline.h
124 renderer_vulkan/vk_query_cache.cpp 126 renderer_vulkan/vk_query_cache.cpp
125 renderer_vulkan/vk_query_cache.h 127 renderer_vulkan/vk_query_cache.h
126 renderer_vulkan/vk_rasterizer.cpp 128 renderer_vulkan/vk_rasterizer.cpp
127 renderer_vulkan/vk_rasterizer.h 129 renderer_vulkan/vk_rasterizer.h
130 renderer_vulkan/vk_render_pass_cache.cpp
131 renderer_vulkan/vk_render_pass_cache.h
128 renderer_vulkan/vk_resource_pool.cpp 132 renderer_vulkan/vk_resource_pool.cpp
129 renderer_vulkan/vk_resource_pool.h 133 renderer_vulkan/vk_resource_pool.h
130 renderer_vulkan/vk_scheduler.cpp 134 renderer_vulkan/vk_scheduler.cpp
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d8f683907 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
72 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; 72 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
73 alpha_test_func.Assign(PackComparisonOp(test_func)); 73 alpha_test_func.Assign(PackComparisonOp(test_func));
74 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); 74 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
75 depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
76 depth_format.Assign(static_cast<u32>(regs.zeta.format));
77 std::ranges::transform(regs.rt, color_formats.begin(),
78 [](const auto& rt) { return static_cast<u8>(rt.format); });
75 79
76 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); 80 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
77 point_size = Common::BitCast<u32>(regs.point_size); 81 point_size = Common::BitCast<u32>(regs.point_size);
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..348f1d6ce 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -60,7 +60,7 @@ struct FixedPipelineState {
60 60
61 void Refresh(const Maxwell& regs, size_t index); 61 void Refresh(const Maxwell& regs, size_t index);
62 62
63 constexpr std::array<bool, 4> Mask() const noexcept { 63 std::array<bool, 4> Mask() const noexcept {
64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; 64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
65 } 65 }
66 66
@@ -97,11 +97,11 @@ struct FixedPipelineState {
97 BitField<20, 3, u32> type; 97 BitField<20, 3, u32> type;
98 BitField<23, 6, u32> size; 98 BitField<23, 6, u32> size;
99 99
100 constexpr Maxwell::VertexAttribute::Type Type() const noexcept { 100 Maxwell::VertexAttribute::Type Type() const noexcept {
101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); 101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
102 } 102 }
103 103
104 constexpr Maxwell::VertexAttribute::Size Size() const noexcept { 104 Maxwell::VertexAttribute::Size Size() const noexcept {
105 return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); 105 return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
106 } 106 }
107 }; 107 };
@@ -187,7 +187,10 @@ struct FixedPipelineState {
187 u32 raw2; 187 u32 raw2;
188 BitField<0, 3, u32> alpha_test_func; 188 BitField<0, 3, u32> alpha_test_func;
189 BitField<3, 1, u32> early_z; 189 BitField<3, 1, u32> early_z;
190 BitField<4, 1, u32> depth_enabled;
191 BitField<5, 5, u32> depth_format;
190 }; 192 };
193 std::array<u8, Maxwell::NumRenderTargets> color_formats;
191 194
192 u32 alpha_test_ref; 195 u32 alpha_test_ref;
193 u32 point_size; 196 u32 point_size;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..dc4ff0da2 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
741 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; 741 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
742} 742}
743 743
744VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
745 switch (msaa_mode) {
746 case Tegra::Texture::MsaaMode::Msaa1x1:
747 return VK_SAMPLE_COUNT_1_BIT;
748 case Tegra::Texture::MsaaMode::Msaa2x1:
749 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
750 return VK_SAMPLE_COUNT_2_BIT;
751 case Tegra::Texture::MsaaMode::Msaa2x2:
752 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
753 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
754 return VK_SAMPLE_COUNT_4_BIT;
755 case Tegra::Texture::MsaaMode::Msaa4x2:
756 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
757 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
758 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
759 return VK_SAMPLE_COUNT_8_BIT;
760 case Tegra::Texture::MsaaMode::Msaa4x4:
761 return VK_SAMPLE_COUNT_16_BIT;
762 default:
763 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
764 return VK_SAMPLE_COUNT_1_BIT;
765 }
766}
767
744} // namespace Vulkan::MaxwellToVK 768} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..9f78e15b6 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
71 71
72VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); 72VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
73 73
74VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
75
74} // namespace Vulkan::MaxwellToVK 76} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..0a59aa659
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,162 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include <boost/container/small_vector.hpp>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/renderer_vulkan/vk_texture_cache.h"
15#include "video_core/renderer_vulkan/vk_update_descriptor.h"
16#include "video_core/texture_cache/texture_cache.h"
17#include "video_core/texture_cache/types.h"
18#include "video_core/textures/texture.h"
19
20namespace Vulkan {
21
22struct TextureHandle {
23 explicit TextureHandle(u32 data, bool via_header_index) {
24 [[likely]] if (via_header_index) {
25 image = data;
26 sampler = data;
27 } else {
28 const Tegra::Texture::TextureHandle handle{data};
29 image = handle.tic_id;
30 sampler = via_header_index ? image : handle.tsc_id.Value();
31 }
32 }
33
34 u32 image;
35 u32 sampler;
36};
37
38struct DescriptorLayoutTuple {
39 vk::DescriptorSetLayout descriptor_set_layout;
40 vk::PipelineLayout pipeline_layout;
41 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
42};
43
44class DescriptorLayoutBuilder {
45public:
46 DescriptorLayoutTuple Create(const vk::Device& device) {
47 DescriptorLayoutTuple result;
48 if (!bindings.empty()) {
49 result.descriptor_set_layout = device.CreateDescriptorSetLayout({
50 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
51 .pNext = nullptr,
52 .flags = 0,
53 .bindingCount = static_cast<u32>(bindings.size()),
54 .pBindings = bindings.data(),
55 });
56 }
57 result.pipeline_layout = device.CreatePipelineLayout({
58 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
59 .pNext = nullptr,
60 .flags = 0,
61 .setLayoutCount = result.descriptor_set_layout ? 1U : 0U,
62 .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(),
63 .pushConstantRangeCount = 0,
64 .pPushConstantRanges = nullptr,
65 });
66 if (!entries.empty()) {
67 result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({
68 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
69 .pNext = nullptr,
70 .flags = 0,
71 .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
72 .pDescriptorUpdateEntries = entries.data(),
73 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
74 .descriptorSetLayout = *result.descriptor_set_layout,
75 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
76 .pipelineLayout = *result.pipeline_layout,
77 .set = 0,
78 });
79 }
80 return result;
81 }
82
83 void Add(const Shader::Info& info, VkShaderStageFlags stage) {
84 for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
85 Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage);
86 }
87 for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
88 Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage);
89 }
90 for ([[maybe_unused]] const auto& desc : info.texture_descriptors) {
91 Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage);
92 }
93 }
94
95private:
96 void Add(VkDescriptorType type, VkShaderStageFlags stage) {
97 bindings.push_back({
98 .binding = binding,
99 .descriptorType = type,
100 .descriptorCount = 1,
101 .stageFlags = stage,
102 .pImmutableSamplers = nullptr,
103 });
104 entries.push_back(VkDescriptorUpdateTemplateEntryKHR{
105 .dstBinding = binding,
106 .dstArrayElement = 0,
107 .descriptorCount = 1,
108 .descriptorType = type,
109 .offset = offset,
110 .stride = sizeof(DescriptorUpdateEntry),
111 });
112 ++binding;
113 offset += sizeof(DescriptorUpdateEntry);
114 }
115
116 boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
117 boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
118 u32 binding{};
119 size_t offset{};
120};
121
122inline VideoCommon::ImageViewType CastType(Shader::TextureType type) {
123 switch (type) {
124 case Shader::TextureType::Color1D:
125 case Shader::TextureType::Shadow1D:
126 return VideoCommon::ImageViewType::e1D;
127 case Shader::TextureType::ColorArray1D:
128 case Shader::TextureType::ShadowArray1D:
129 return VideoCommon::ImageViewType::e1DArray;
130 case Shader::TextureType::Color2D:
131 case Shader::TextureType::Shadow2D:
132 return VideoCommon::ImageViewType::e2D;
133 case Shader::TextureType::ColorArray2D:
134 case Shader::TextureType::ShadowArray2D:
135 return VideoCommon::ImageViewType::e2DArray;
136 case Shader::TextureType::Color3D:
137 case Shader::TextureType::Shadow3D:
138 return VideoCommon::ImageViewType::e3D;
139 case Shader::TextureType::ColorCube:
140 case Shader::TextureType::ShadowCube:
141 return VideoCommon::ImageViewType::Cube;
142 case Shader::TextureType::ColorArrayCube:
143 case Shader::TextureType::ShadowArrayCube:
144 return VideoCommon::ImageViewType::CubeArray;
145 }
146 UNREACHABLE_MSG("Invalid texture type {}", type);
147 return {};
148}
149
150inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers,
151 const ImageId* image_view_ids, TextureCache& texture_cache,
152 VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) {
153 for (const auto& desc : info.texture_descriptors) {
154 const VkSampler sampler{samplers[index]};
155 ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
156 const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
157 update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
158 ++index;
159 }
160}
161
162} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index ef8bef6ff..6684d37a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -6,6 +6,7 @@
6 6
7#include <boost/container/small_vector.hpp> 7#include <boost/container/small_vector.hpp>
8 8
9#include "video_core/renderer_vulkan/pipeline_helper.h"
9#include "video_core/renderer_vulkan/vk_buffer_cache.h" 10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
10#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 11#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
11#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 12#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
@@ -17,140 +18,10 @@
17 18
18namespace Vulkan { 19namespace Vulkan {
19namespace { 20namespace {
20vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { 21DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) {
21 boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings; 22 DescriptorLayoutBuilder builder;
22 u32 binding{}; 23 builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
23 for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { 24 return builder.Create(device.GetLogical());
24 bindings.push_back({
25 .binding = binding,
26 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
27 .descriptorCount = 1,
28 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
29 .pImmutableSamplers = nullptr,
30 });
31 ++binding;
32 }
33 for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
34 bindings.push_back({
35 .binding = binding,
36 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
37 .descriptorCount = 1,
38 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
39 .pImmutableSamplers = nullptr,
40 });
41 ++binding;
42 }
43 for (const auto& desc : info.texture_descriptors) {
44 bindings.push_back({
45 .binding = binding,
46 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
47 .descriptorCount = 1,
48 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
49 .pImmutableSamplers = nullptr,
50 });
51 ++binding;
52 }
53 return device.GetLogical().CreateDescriptorSetLayout({
54 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
55 .pNext = nullptr,
56 .flags = 0,
57 .bindingCount = static_cast<u32>(bindings.size()),
58 .pBindings = bindings.data(),
59 });
60}
61
62vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
63 const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
64 VkPipelineLayout pipeline_layout) {
65 boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
66 size_t offset{};
67 u32 binding{};
68 for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
69 entries.push_back({
70 .dstBinding = binding,
71 .dstArrayElement = 0,
72 .descriptorCount = 1,
73 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
74 .offset = offset,
75 .stride = sizeof(DescriptorUpdateEntry),
76 });
77 ++binding;
78 offset += sizeof(DescriptorUpdateEntry);
79 }
80 for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
81 entries.push_back({
82 .dstBinding = binding,
83 .dstArrayElement = 0,
84 .descriptorCount = 1,
85 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
86 .offset = offset,
87 .stride = sizeof(DescriptorUpdateEntry),
88 });
89 ++binding;
90 offset += sizeof(DescriptorUpdateEntry);
91 }
92 for (const auto& desc : info.texture_descriptors) {
93 entries.push_back({
94 .dstBinding = binding,
95 .dstArrayElement = 0,
96 .descriptorCount = 1,
97 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
98 .offset = offset,
99 .stride = sizeof(DescriptorUpdateEntry),
100 });
101 ++binding;
102 offset += sizeof(DescriptorUpdateEntry);
103 }
104 return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
105 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
106 .pNext = nullptr,
107 .flags = 0,
108 .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
109 .pDescriptorUpdateEntries = entries.data(),
110 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
111 .descriptorSetLayout = descriptor_set_layout,
112 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
113 .pipelineLayout = pipeline_layout,
114 .set = 0,
115 });
116}
117
118struct TextureHandle {
119 explicit TextureHandle(u32 data, bool via_header_index) {
120 const Tegra::Texture::TextureHandle handle{data};
121 image = handle.tic_id;
122 sampler = via_header_index ? image : handle.tsc_id.Value();
123 }
124
125 u32 image;
126 u32 sampler;
127};
128
129VideoCommon::ImageViewType CastType(Shader::TextureType type) {
130 switch (type) {
131 case Shader::TextureType::Color1D:
132 case Shader::TextureType::Shadow1D:
133 return VideoCommon::ImageViewType::e1D;
134 case Shader::TextureType::ColorArray1D:
135 case Shader::TextureType::ShadowArray1D:
136 return VideoCommon::ImageViewType::e1DArray;
137 case Shader::TextureType::Color2D:
138 case Shader::TextureType::Shadow2D:
139 return VideoCommon::ImageViewType::e2D;
140 case Shader::TextureType::ColorArray2D:
141 case Shader::TextureType::ShadowArray2D:
142 return VideoCommon::ImageViewType::e2DArray;
143 case Shader::TextureType::Color3D:
144 case Shader::TextureType::Shadow3D:
145 return VideoCommon::ImageViewType::e3D;
146 case Shader::TextureType::ColorCube:
147 case Shader::TextureType::ShadowCube:
148 return VideoCommon::ImageViewType::Cube;
149 case Shader::TextureType::ColorArrayCube:
150 case Shader::TextureType::ShadowArrayCube:
151 return VideoCommon::ImageViewType::CubeArray;
152 }
153 UNREACHABLE_MSG("Invalid texture type {}", type);
154} 25}
155} // Anonymous namespace 26} // Anonymous namespace
156 27
@@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
158 VKUpdateDescriptorQueue& update_descriptor_queue_, 29 VKUpdateDescriptorQueue& update_descriptor_queue_,
159 const Shader::Info& info_, vk::ShaderModule spv_module_) 30 const Shader::Info& info_, vk::ShaderModule spv_module_)
160 : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, 31 : update_descriptor_queue{&update_descriptor_queue_}, info{info_},
161 spv_module(std::move(spv_module_)), 32 spv_module(std::move(spv_module_)) {
162 descriptor_set_layout(CreateDescriptorSetLayout(device, info)), 33 DescriptorLayoutTuple tuple{CreateLayout(device, info)};
163 descriptor_allocator(descriptor_pool, *descriptor_set_layout), 34 descriptor_set_layout = std::move(tuple.descriptor_set_layout);
164 pipeline_layout{device.GetLogical().CreatePipelineLayout({ 35 pipeline_layout = std::move(tuple.pipeline_layout);
165 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 36 descriptor_update_template = std::move(tuple.descriptor_update_template);
166 .pNext = nullptr, 37 descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
167 .flags = 0, 38
168 .setLayoutCount = 1, 39 pipeline = device.GetLogical().CreateComputePipeline({
169 .pSetLayouts = descriptor_set_layout.address(), 40 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
170 .pushConstantRangeCount = 0, 41 .pNext = nullptr,
171 .pPushConstantRanges = nullptr, 42 .flags = 0,
172 })}, 43 .stage{
173 descriptor_update_template{ 44 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
174 CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, 45 .pNext = nullptr,
175 pipeline{device.GetLogical().CreateComputePipeline({ 46 .flags = 0,
176 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 47 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
177 .pNext = nullptr, 48 .module = *spv_module,
178 .flags = 0, 49 .pName = "main",
179 .stage{ 50 .pSpecializationInfo = nullptr,
180 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 51 },
181 .pNext = nullptr, 52 .layout = *pipeline_layout,
182 .flags = 0, 53 .basePipelineHandle = 0,
183 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 54 .basePipelineIndex = 0,
184 .module = *spv_module, 55 });
185 .pName = "main", 56}
186 .pSpecializationInfo = nullptr,
187 },
188 .layout = *pipeline_layout,
189 .basePipelineHandle = 0,
190 .basePipelineIndex = 0,
191 })} {}
192 57
193void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { 58void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
194 buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); 59 buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
@@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
211 static constexpr size_t max_elements = 64; 76 static constexpr size_t max_elements = 64;
212 std::array<ImageId, max_elements> image_view_ids; 77 std::array<ImageId, max_elements> image_view_ids;
213 boost::container::static_vector<u32, max_elements> image_view_indices; 78 boost::container::static_vector<u32, max_elements> image_view_indices;
214 boost::container::static_vector<VkSampler, max_elements> sampler_handles; 79 boost::container::static_vector<VkSampler, max_elements> samplers;
215 80
216 const auto& launch_desc{kepler_compute.launch_description}; 81 const auto& launch_desc{kepler_compute.launch_description};
217 const auto& cbufs{launch_desc.const_buffer_config}; 82 const auto& cbufs{launch_desc.const_buffer_config};
@@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple
228 image_view_indices.push_back(handle.image); 93 image_view_indices.push_back(handle.image);
229 94
230 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); 95 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
231 sampler_handles.push_back(sampler->Handle()); 96 samplers.push_back(sampler->Handle());
232 } 97 }
233
234 const std::span indices_span(image_view_indices.data(), image_view_indices.size()); 98 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
235 texture_cache.FillComputeImageViews(indices_span, image_view_ids); 99 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
236 100
237 size_t index{}; 101 size_t index{};
238 for (const auto& desc : info.texture_descriptors) { 102 PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache,
239 const VkSampler vk_sampler{sampler_handles[index]}; 103 *update_descriptor_queue, index);
240 ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
241 const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
242 update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler);
243 ++index;
244 }
245} 104}
246 105
247VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { 106VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 08d73a2a4..e82e5816b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -9,7 +9,6 @@
9#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
10#include "video_core/renderer_vulkan/vk_buffer_cache.h" 10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
11#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 11#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
12#include "video_core/renderer_vulkan/vk_pipeline.h"
13#include "video_core/renderer_vulkan/vk_texture_cache.h" 12#include "video_core/renderer_vulkan/vk_texture_cache.h"
14#include "video_core/renderer_vulkan/vk_update_descriptor.h" 13#include "video_core/renderer_vulkan/vk_update_descriptor.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -18,7 +17,7 @@ namespace Vulkan {
18 17
19class Device; 18class Device;
20 19
21class ComputePipeline : public Pipeline { 20class ComputePipeline {
22public: 21public:
23 explicit ComputePipeline() = default; 22 explicit ComputePipeline() = default;
24 explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, 23 explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
new file mode 100644
index 000000000..a2ec418b1
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,445 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <span>
7
8#include <boost/container/small_vector.hpp>
9#include <boost/container/static_vector.hpp>
10
11#include "common/bit_field.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
13#include "video_core/renderer_vulkan/pipeline_helper.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
16#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_texture_cache.h"
19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/vulkan_common/vulkan_device.h"
21
22namespace Vulkan {
23namespace {
24using boost::container::small_vector;
25using boost::container::static_vector;
26using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::PixelFormatFromDepthFormat;
28using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
29
30DescriptorLayoutTuple CreateLayout(const Device& device, std::span<const Shader::Info> infos) {
31 DescriptorLayoutBuilder builder;
32 for (size_t index = 0; index < infos.size(); ++index) {
33 static constexpr std::array stages{
34 VK_SHADER_STAGE_VERTEX_BIT,
35 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
36 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
37 VK_SHADER_STAGE_GEOMETRY_BIT,
38 VK_SHADER_STAGE_FRAGMENT_BIT,
39 };
40 builder.Add(infos[index], stages.at(index));
41 }
42 return builder.Create(device.GetLogical());
43}
44
45template <class StencilFace>
46VkStencilOpState GetStencilFaceState(const StencilFace& face) {
47 return {
48 .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
49 .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
50 .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
51 .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
52 .compareMask = 0,
53 .writeMask = 0,
54 .reference = 0,
55 };
56}
57
58bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
59 static constexpr std::array unsupported_topologies{
60 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
61 VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
62 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
63 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
64 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
65 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
66 // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
67 };
68 return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
69}
70
71VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
72 union Swizzle {
73 u32 raw;
74 BitField<0, 3, Maxwell::ViewportSwizzle> x;
75 BitField<4, 3, Maxwell::ViewportSwizzle> y;
76 BitField<8, 3, Maxwell::ViewportSwizzle> z;
77 BitField<12, 3, Maxwell::ViewportSwizzle> w;
78 };
79 const Swizzle unpacked{swizzle};
80 return VkViewportSwizzleNV{
81 .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
82 .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
83 .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
84 .w = MaxwellToVK::ViewportSwizzle(unpacked.w),
85 };
86}
87
88PixelFormat DecodeFormat(u8 encoded_format) {
89 const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
90 if (format == Tegra::RenderTargetFormat::NONE) {
91 return PixelFormat::Invalid;
92 }
93 return PixelFormatFromRenderTargetFormat(format);
94}
95
96RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
97 RenderPassKey key;
98 std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
99 if (state.depth_enabled != 0) {
100 const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
101 key.depth_format = PixelFormatFromDepthFormat(depth_format);
102 } else {
103 key.depth_format = PixelFormat::Invalid;
104 }
105 key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
106 return key;
107}
108} // Anonymous namespace
109
110GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
111 Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_,
112 BufferCache& buffer_cache_, TextureCache& texture_cache_,
113 const Device& device, VKDescriptorPool& descriptor_pool,
114 VKUpdateDescriptorQueue& update_descriptor_queue_,
115 RenderPassCache& render_pass_cache,
116 const FixedPipelineState& state,
117 std::array<vk::ShaderModule, NUM_STAGES> stages,
118 const std::array<const Shader::Info*, NUM_STAGES>& infos)
119 : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_},
120 buffer_cache{&buffer_cache_}, scheduler{&scheduler_},
121 update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} {
122 std::ranges::transform(infos, stage_infos.begin(),
123 [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
124
125 DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)};
126 descriptor_set_layout = std::move(tuple.descriptor_set_layout);
127 pipeline_layout = std::move(tuple.pipeline_layout);
128 descriptor_update_template = std::move(tuple.descriptor_update_template);
129 descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
130
131 const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
132 MakePipeline(device, state, render_pass);
133}
134
135void GraphicsPipeline::Configure(bool is_indexed) {
136 static constexpr size_t max_images_elements = 64;
137 std::array<ImageId, max_images_elements> image_view_ids;
138 static_vector<u32, max_images_elements> image_view_indices;
139 static_vector<VkSampler, max_images_elements> samplers;
140
141 texture_cache->SynchronizeGraphicsDescriptors();
142 texture_cache->UpdateRenderTargets(false);
143
144 const auto& regs{maxwell3d->regs};
145 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
146 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
147 const Shader::Info& info{stage_infos[stage]};
148 buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
149 buffer_cache->UnbindGraphicsStorageBuffers(stage);
150 size_t index{};
151 for (const auto& desc : info.storage_buffers_descriptors) {
152 ASSERT(desc.count == 1);
153 buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset,
154 true);
155 ++index;
156 }
157 const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
158 for (const auto& desc : info.texture_descriptors) {
159 const u32 cbuf_index{desc.cbuf_index};
160 const u32 cbuf_offset{desc.cbuf_offset};
161 ASSERT(cbufs[cbuf_index].enabled);
162 const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset};
163 const u32 raw_handle{gpu_memory->Read<u32>(addr)};
164
165 const TextureHandle handle(raw_handle, via_header_index);
166 image_view_indices.push_back(handle.image);
167
168 Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)};
169 samplers.push_back(sampler->Handle());
170 }
171 }
172 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
173 buffer_cache->UpdateGraphicsBuffers(is_indexed);
174 texture_cache->FillGraphicsImageViews(indices_span, image_view_ids);
175
176 buffer_cache->BindHostGeometryBuffers(is_indexed);
177
178 size_t index{};
179 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
180 buffer_cache->BindHostStageBuffers(stage);
181 PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(),
182 *texture_cache, *update_descriptor_queue, index);
183 }
184 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
185 update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
186
187 scheduler->BindGraphicsPipeline(*pipeline);
188 scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
189 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
190 nullptr);
191 });
192}
193
194void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state,
195 VkRenderPass render_pass) {
196 FixedPipelineState::DynamicState dynamic{};
197 if (!device.IsExtExtendedDynamicStateSupported()) {
198 dynamic = state.dynamic_state;
199 }
200 static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
201 static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
202 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
203 const bool instanced = state.binding_divisors[index] != 0;
204 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
205 vertex_bindings.push_back({
206 .binding = static_cast<u32>(index),
207 .stride = dynamic.vertex_strides[index],
208 .inputRate = rate,
209 });
210 if (instanced) {
211 vertex_binding_divisors.push_back({
212 .binding = static_cast<u32>(index),
213 .divisor = state.binding_divisors[index],
214 });
215 }
216 }
217 static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
218 const auto& input_attributes = stage_infos[0].loads_generics;
219 for (size_t index = 0; index < state.attributes.size(); ++index) {
220 const auto& attribute = state.attributes[index];
221 if (!attribute.enabled || !input_attributes[index]) {
222 continue;
223 }
224 vertex_attributes.push_back({
225 .location = static_cast<u32>(index),
226 .binding = attribute.buffer,
227 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
228 .offset = attribute.offset,
229 });
230 }
231 VkPipelineVertexInputStateCreateInfo vertex_input_ci{
232 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
233 .pNext = nullptr,
234 .flags = 0,
235 .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
236 .pVertexBindingDescriptions = vertex_bindings.data(),
237 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
238 .pVertexAttributeDescriptions = vertex_attributes.data(),
239 };
240 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
241 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
242 .pNext = nullptr,
243 .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
244 .pVertexBindingDivisors = vertex_binding_divisors.data(),
245 };
246 if (!vertex_binding_divisors.empty()) {
247 vertex_input_ci.pNext = &input_divisor_ci;
248 }
249 const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
250 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
251 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
252 .pNext = nullptr,
253 .flags = 0,
254 .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
255 .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
256 SupportsPrimitiveRestart(input_assembly_topology),
257 };
258 const VkPipelineTessellationStateCreateInfo tessellation_ci{
259 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
260 .pNext = nullptr,
261 .flags = 0,
262 .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
263 };
264 VkPipelineViewportStateCreateInfo viewport_ci{
265 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
266 .pNext = nullptr,
267 .flags = 0,
268 .viewportCount = Maxwell::NumViewports,
269 .pViewports = nullptr,
270 .scissorCount = Maxwell::NumViewports,
271 .pScissors = nullptr,
272 };
273 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
274 std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
275 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
276 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
277 .pNext = nullptr,
278 .flags = 0,
279 .viewportCount = Maxwell::NumViewports,
280 .pViewportSwizzles = swizzles.data(),
281 };
282 if (device.IsNvViewportSwizzleSupported()) {
283 viewport_ci.pNext = &swizzle_ci;
284 }
285
286 const VkPipelineRasterizationStateCreateInfo rasterization_ci{
287 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
288 .pNext = nullptr,
289 .flags = 0,
290 .depthClampEnable =
291 static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
292 .rasterizerDiscardEnable =
293 static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
294 .polygonMode = VK_POLYGON_MODE_FILL,
295 .cullMode = static_cast<VkCullModeFlags>(
296 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
297 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
298 .depthBiasEnable = state.depth_bias_enable,
299 .depthBiasConstantFactor = 0.0f,
300 .depthBiasClamp = 0.0f,
301 .depthBiasSlopeFactor = 0.0f,
302 .lineWidth = 1.0f,
303 };
304 const VkPipelineMultisampleStateCreateInfo multisample_ci{
305 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
306 .pNext = nullptr,
307 .flags = 0,
308 .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode),
309 .sampleShadingEnable = VK_FALSE,
310 .minSampleShading = 0.0f,
311 .pSampleMask = nullptr,
312 .alphaToCoverageEnable = VK_FALSE,
313 .alphaToOneEnable = VK_FALSE,
314 };
315 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
316 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
317 .pNext = nullptr,
318 .flags = 0,
319 .depthTestEnable = dynamic.depth_test_enable,
320 .depthWriteEnable = dynamic.depth_write_enable,
321 .depthCompareOp = dynamic.depth_test_enable
322 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
323 : VK_COMPARE_OP_ALWAYS,
324 .depthBoundsTestEnable = dynamic.depth_bounds_enable,
325 .stencilTestEnable = dynamic.stencil_enable,
326 .front = GetStencilFaceState(dynamic.front),
327 .back = GetStencilFaceState(dynamic.back),
328 .minDepthBounds = 0.0f,
329 .maxDepthBounds = 0.0f,
330 };
331 static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
332 for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
333 static constexpr std::array mask_table{
334 VK_COLOR_COMPONENT_R_BIT,
335 VK_COLOR_COMPONENT_G_BIT,
336 VK_COLOR_COMPONENT_B_BIT,
337 VK_COLOR_COMPONENT_A_BIT,
338 };
339 const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
340 if (format == Tegra::RenderTargetFormat::NONE) {
341 continue;
342 }
343 const auto& blend{state.attachments[index]};
344 const std::array mask{blend.Mask()};
345 VkColorComponentFlags write_mask{};
346 for (size_t i = 0; i < mask_table.size(); ++i) {
347 write_mask |= mask[i] ? mask_table[i] : 0;
348 }
349 cb_attachments.push_back({
350 .blendEnable = blend.enable != 0,
351 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
352 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
353 .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
354 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
355 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
356 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
357 .colorWriteMask = write_mask,
358 });
359 }
360 const VkPipelineColorBlendStateCreateInfo color_blend_ci{
361 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
362 .pNext = nullptr,
363 .flags = 0,
364 .logicOpEnable = VK_FALSE,
365 .logicOp = VK_LOGIC_OP_COPY,
366 .attachmentCount = static_cast<u32>(cb_attachments.size()),
367 .pAttachments = cb_attachments.data(),
368 .blendConstants = {},
369 };
370 static_vector<VkDynamicState, 17> dynamic_states{
371 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
372 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
373 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
374 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
375 };
376 if (device.IsExtExtendedDynamicStateSupported()) {
377 static constexpr std::array extended{
378 VK_DYNAMIC_STATE_CULL_MODE_EXT,
379 VK_DYNAMIC_STATE_FRONT_FACE_EXT,
380 VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
381 VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
382 VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
383 VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
384 VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
385 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
386 VK_DYNAMIC_STATE_STENCIL_OP_EXT,
387 };
388 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
389 }
390 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
391 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
392 .pNext = nullptr,
393 .flags = 0,
394 .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
395 .pDynamicStates = dynamic_states.data(),
396 };
397 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
398 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
399 .pNext = nullptr,
400 .requiredSubgroupSize = GuestWarpSize,
401 };
402 static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
403 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
404 if (!spv_modules[stage]) {
405 continue;
406 }
407 [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
408 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
409 .pNext = nullptr,
410 .flags = 0,
411 .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)),
412 .module = *spv_modules[stage],
413 .pName = "main",
414 .pSpecializationInfo = nullptr,
415 });
416 /*
417 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
418 stage_ci.pNext = &subgroup_size_ci;
419 }
420 */
421 }
422 pipeline = device.GetLogical().CreateGraphicsPipeline({
423 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
424 .pNext = nullptr,
425 .flags = 0,
426 .stageCount = static_cast<u32>(shader_stages.size()),
427 .pStages = shader_stages.data(),
428 .pVertexInputState = &vertex_input_ci,
429 .pInputAssemblyState = &input_assembly_ci,
430 .pTessellationState = &tessellation_ci,
431 .pViewportState = &viewport_ci,
432 .pRasterizationState = &rasterization_ci,
433 .pMultisampleState = &multisample_ci,
434 .pDepthStencilState = &depth_stencil_ci,
435 .pColorBlendState = &color_blend_ci,
436 .pDynamicState = &dynamic_state_ci,
437 .layout = *pipeline_layout,
438 .renderPass = render_pass,
439 .subpass = 0,
440 .basePipelineHandle = nullptr,
441 .basePipelineIndex = 0,
442 });
443}
444
445} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
new file mode 100644
index 000000000..ba1d34a83
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "shader_recompiler/shader_info.h"
10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
12#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
13#include "video_core/renderer_vulkan/vk_texture_cache.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h"
16
17namespace Vulkan {
18
19class Device;
20class RenderPassCache;
21class VKScheduler;
22class VKUpdateDescriptorQueue;
23
24class GraphicsPipeline {
25 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
26
27public:
28 explicit GraphicsPipeline() = default;
29 explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d,
30 Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
31 BufferCache& buffer_cache,
32 TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool,
33 VKUpdateDescriptorQueue& update_descriptor_queue,
34 RenderPassCache& render_pass_cache, const FixedPipelineState& state,
35 std::array<vk::ShaderModule, NUM_STAGES> stages,
36 const std::array<const Shader::Info*, NUM_STAGES>& infos);
37
38 void Configure(bool is_indexed);
39
40 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default;
41 GraphicsPipeline(GraphicsPipeline&&) noexcept = default;
42
43 GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
44 GraphicsPipeline(const GraphicsPipeline&) = delete;
45
46private:
47 void MakePipeline(const Device& device, const FixedPipelineState& state,
48 VkRenderPass render_pass);
49
50 Tegra::Engines::Maxwell3D* maxwell3d{};
51 Tegra::MemoryManager* gpu_memory{};
52 TextureCache* texture_cache{};
53 BufferCache* buffer_cache{};
54 VKScheduler* scheduler{};
55 VKUpdateDescriptorQueue* update_descriptor_queue{};
56
57 std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
58 std::array<Shader::Info, NUM_STAGES> stage_infos;
59 vk::DescriptorSetLayout descriptor_set_layout;
60 DescriptorAllocator descriptor_allocator;
61 vk::PipelineLayout pipeline_layout;
62 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
63 vk::Pipeline pipeline;
64};
65
66} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h
deleted file mode 100644
index b06288403..000000000
--- a/src/video_core/renderer_vulkan/vk_pipeline.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include "video_core/vulkan_common/vulkan_wrapper.h"
10
11namespace Vulkan {
12
13class Pipeline {
14public:
15 /// Add a reference count to the pipeline
16 void AddRef() noexcept {
17 ++ref_count;
18 }
19
20 [[nodiscard]] bool RemoveRef() noexcept {
21 --ref_count;
22 return ref_count == 0;
23 }
24
25 [[nodiscard]] u64 UsageTick() const noexcept {
26 return usage_tick;
27 }
28
29protected:
30 u64 usage_tick{};
31
32private:
33 size_t ref_count{};
34};
35
36} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 5477a2903..c9da2080d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -12,8 +12,11 @@
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "core/core.h" 13#include "core/core.h"
14#include "core/memory.h" 14#include "core/memory.h"
15#include "shader_recompiler/backend/spirv/emit_spirv.h"
15#include "shader_recompiler/environment.h" 16#include "shader_recompiler/environment.h"
16#include "shader_recompiler/recompiler.h" 17#include "shader_recompiler/frontend/maxwell/control_flow.h"
18#include "shader_recompiler/frontend/maxwell/program.h"
19#include "shader_recompiler/program_header.h"
17#include "video_core/engines/kepler_compute.h" 20#include "video_core/engines/kepler_compute.h"
18#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
19#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
@@ -34,18 +37,18 @@
34namespace Vulkan { 37namespace Vulkan {
35MICROPROFILE_DECLARE(Vulkan_PipelineCache); 38MICROPROFILE_DECLARE(Vulkan_PipelineCache);
36 39
37using Tegra::Engines::ShaderType;
38
39namespace { 40namespace {
40class Environment final : public Shader::Environment { 41using Shader::Backend::SPIRV::EmitSPIRV;
42
43class GenericEnvironment : public Shader::Environment {
41public: 44public:
42 explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, 45 explicit GenericEnvironment() = default;
43 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) 46 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
44 : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} 47 : gpu_memory{&gpu_memory_}, program_base{program_base_} {}
45 48
46 ~Environment() override = default; 49 ~GenericEnvironment() override = default;
47 50
48 [[nodiscard]] std::optional<u128> Analyze(u32 start_address) { 51 std::optional<u128> Analyze(u32 start_address) {
49 const std::optional<u64> size{TryFindSize(start_address)}; 52 const std::optional<u64> size{TryFindSize(start_address)};
50 if (!size) { 53 if (!size) {
51 return std::nullopt; 54 return std::nullopt;
@@ -55,52 +58,47 @@ public:
55 return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size()); 58 return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
56 } 59 }
57 60
58 [[nodiscard]] size_t ShaderSize() const noexcept { 61 [[nodiscard]] size_t CachedSize() const noexcept {
62 return cached_highest - cached_lowest + INST_SIZE;
63 }
64
65 [[nodiscard]] size_t ReadSize() const noexcept {
59 return read_highest - read_lowest + INST_SIZE; 66 return read_highest - read_lowest + INST_SIZE;
60 } 67 }
61 68
62 [[nodiscard]] u128 ComputeHash() const { 69 [[nodiscard]] u128 CalculateHash() const {
63 const size_t size{ShaderSize()}; 70 const size_t size{ReadSize()};
64 auto data = std::make_unique<u64[]>(size); 71 auto data = std::make_unique<u64[]>(size);
65 gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); 72 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
66 return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size); 73 return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
67 } 74 }
68 75
69 u64 ReadInstruction(u32 address) override { 76 u64 ReadInstruction(u32 address) final {
70 read_lowest = std::min(read_lowest, address); 77 read_lowest = std::min(read_lowest, address);
71 read_highest = std::max(read_highest, address); 78 read_highest = std::max(read_highest, address);
72 79
73 if (address >= cached_lowest && address < cached_highest) { 80 if (address >= cached_lowest && address < cached_highest) {
74 return code[address / INST_SIZE]; 81 return code[address / INST_SIZE];
75 } 82 }
76 return gpu_memory.Read<u64>(program_base + address); 83 return gpu_memory->Read<u64>(program_base + address);
77 }
78
79 u32 TextureBoundBuffer() override {
80 return kepler_compute.regs.tex_cb_index;
81 }
82
83 std::array<u32, 3> WorkgroupSize() override {
84 const auto& qmd{kepler_compute.launch_description};
85 return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
86 } 84 }
87 85
88private: 86protected:
89 static constexpr size_t INST_SIZE = sizeof(u64); 87 static constexpr size_t INST_SIZE = sizeof(u64);
90 static constexpr size_t BLOCK_SIZE = 0x1000;
91 static constexpr size_t MAXIMUM_SIZE = 0x100000;
92 88
93 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; 89 std::optional<u64> TryFindSize(GPUVAddr guest_addr) {
94 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; 90 constexpr size_t BLOCK_SIZE = 0x1000;
91 constexpr size_t MAXIMUM_SIZE = 0x100000;
92
93 constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
94 constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
95 95
96 std::optional<u64> TryFindSize(u32 start_address) {
97 GPUVAddr guest_addr = program_base + start_address;
98 size_t offset = 0; 96 size_t offset = 0;
99 size_t size = BLOCK_SIZE; 97 size_t size = BLOCK_SIZE;
100 while (size <= MAXIMUM_SIZE) { 98 while (size <= MAXIMUM_SIZE) {
101 code.resize(size / INST_SIZE); 99 code.resize(size / INST_SIZE);
102 u64* const data = code.data() + offset / INST_SIZE; 100 u64* const data = code.data() + offset / INST_SIZE;
103 gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); 101 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
104 for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { 102 for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
105 const u64 inst = data[i / INST_SIZE]; 103 const u64 inst = data[i / INST_SIZE];
106 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { 104 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
@@ -114,17 +112,87 @@ private:
114 return std::nullopt; 112 return std::nullopt;
115 } 113 }
116 114
117 Tegra::Engines::KeplerCompute& kepler_compute; 115 Tegra::MemoryManager* gpu_memory{};
118 Tegra::MemoryManager& gpu_memory; 116 GPUVAddr program_base{};
119 GPUVAddr program_base; 117
118 std::vector<u64> code;
120 119
121 u32 read_lowest = 0; 120 u32 read_lowest = std::numeric_limits<u32>::max();
122 u32 read_highest = 0; 121 u32 read_highest = 0;
123 122
124 std::vector<u64> code;
125 u32 cached_lowest = std::numeric_limits<u32>::max(); 123 u32 cached_lowest = std::numeric_limits<u32>::max();
126 u32 cached_highest = 0; 124 u32 cached_highest = 0;
127}; 125};
126
127class GraphicsEnvironment final : public GenericEnvironment {
128public:
129 explicit GraphicsEnvironment() = default;
130 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
131 Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
132 GPUVAddr program_base_, u32 start_offset)
133 : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} {
134 gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph));
135 switch (program) {
136 case Maxwell::ShaderProgram::VertexA:
137 stage = Shader::Stage::VertexA;
138 break;
139 case Maxwell::ShaderProgram::VertexB:
140 stage = Shader::Stage::VertexB;
141 break;
142 case Maxwell::ShaderProgram::TesselationControl:
143 stage = Shader::Stage::TessellationControl;
144 break;
145 case Maxwell::ShaderProgram::TesselationEval:
146 stage = Shader::Stage::TessellationEval;
147 break;
148 case Maxwell::ShaderProgram::Geometry:
149 stage = Shader::Stage::Geometry;
150 break;
151 case Maxwell::ShaderProgram::Fragment:
152 stage = Shader::Stage::Fragment;
153 break;
154 default:
155 UNREACHABLE_MSG("Invalid program={}", program);
156 }
157 }
158
159 ~GraphicsEnvironment() override = default;
160
161 u32 TextureBoundBuffer() override {
162 return maxwell3d->regs.tex_cb_index;
163 }
164
165 std::array<u32, 3> WorkgroupSize() override {
166 throw Shader::LogicError("Requesting workgroup size in a graphics stage");
167 }
168
169private:
170 Tegra::Engines::Maxwell3D* maxwell3d{};
171};
172
173class ComputeEnvironment final : public GenericEnvironment {
174public:
175 explicit ComputeEnvironment() = default;
176 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
177 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
178 : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} {
179 stage = Shader::Stage::Compute;
180 }
181
182 ~ComputeEnvironment() override = default;
183
184 u32 TextureBoundBuffer() override {
185 return kepler_compute->regs.tex_cb_index;
186 }
187
188 std::array<u32, 3> WorkgroupSize() override {
189 const auto& qmd{kepler_compute->launch_description};
190 return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
191 }
192
193private:
194 Tegra::Engines::KeplerCompute* kepler_compute{};
195};
128} // Anonymous namespace 196} // Anonymous namespace
129 197
130size_t ComputePipelineCacheKey::Hash() const noexcept { 198size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
136 return std::memcmp(&rhs, this, sizeof *this) == 0; 204 return std::memcmp(&rhs, this, sizeof *this) == 0;
137} 205}
138 206
207size_t GraphicsPipelineCacheKey::Hash() const noexcept {
208 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
209 return static_cast<size_t>(hash);
210}
211
212bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
213 return std::memcmp(&rhs, this, Size()) == 0;
214}
215
139PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 216PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
140 Tegra::Engines::Maxwell3D& maxwell3d_, 217 Tegra::Engines::Maxwell3D& maxwell3d_,
141 Tegra::Engines::KeplerCompute& kepler_compute_, 218 Tegra::Engines::KeplerCompute& kepler_compute_,
142 Tegra::MemoryManager& gpu_memory_, const Device& device_, 219 Tegra::MemoryManager& gpu_memory_, const Device& device_,
143 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 220 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
144 VKUpdateDescriptorQueue& update_descriptor_queue_) 221 VKUpdateDescriptorQueue& update_descriptor_queue_,
222 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
223 TextureCache& texture_cache_)
145 : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, 224 : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
146 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, 225 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
147 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ 226 scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
148 update_descriptor_queue_} {} 227 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
228 buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {
229 const auto& float_control{device.FloatControlProperties()};
230 profile = Shader::Profile{
231 .unified_descriptor_binding = true,
232 .support_float_controls = true,
233 .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
234 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
235 .support_separate_rounding_mode =
236 float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
237 .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
238 .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
239 .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
240 .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
241 .support_fp16_signed_zero_nan_preserve =
242 float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
243 .support_fp32_signed_zero_nan_preserve =
244 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
245 .has_broken_spirv_clamp = true, // TODO: is_intel
246 };
247}
149 248
150PipelineCache::~PipelineCache() = default; 249PipelineCache::~PipelineCache() = default;
151 250
251GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
252 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
253
254 if (!RefreshStages()) {
255 return nullptr;
256 }
257 graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
258
259 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
260 auto& pipeline{pair->second};
261 if (!is_new) {
262 return &pipeline;
263 }
264 pipeline = CreateGraphicsPipeline();
265 return &pipeline;
266}
267
152ComputePipeline* PipelineCache::CurrentComputePipeline() { 268ComputePipeline* PipelineCache::CurrentComputePipeline() {
153 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 269 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
154 270
@@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
170 return &pipeline; 286 return &pipeline;
171 } 287 }
172 pipeline = CreateComputePipeline(shader); 288 pipeline = CreateComputePipeline(shader);
173 shader->compute_users.push_back(key);
174 return &pipeline; 289 return &pipeline;
175} 290}
176 291
292bool PipelineCache::RefreshStages() {
293 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
294 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
295 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
296 graphics_key.unique_hashes[index] = u128{};
297 continue;
298 }
299 const auto& shader_config{maxwell3d.regs.shader_config[index]};
300 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
301 const GPUVAddr shader_addr{base_addr + shader_config.offset};
302 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
303 if (!cpu_shader_addr) {
304 LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
305 return false;
306 }
307 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
308 if (!shader_info) {
309 const u32 offset{shader_config.offset};
310 shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr);
311 }
312 graphics_key.unique_hashes[index] = shader_info->unique_hash;
313 }
314 return true;
315}
316
317const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr,
318 u32 start_address, VAddr cpu_addr) {
319 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
320 auto info = std::make_unique<ShaderInfo>();
321 if (const std::optional<u128> cached_hash{env.Analyze(start_address)}) {
322 info->unique_hash = *cached_hash;
323 info->size_bytes = env.CachedSize();
324 } else {
325 // Slow path, not really hit on commercial games
326 // Build a control flow graph to get the real shader size
327 flow_block_pool.ReleaseContents();
328 Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address};
329 info->unique_hash = env.CalculateHash();
330 info->size_bytes = env.ReadSize();
331 }
332 const size_t size_bytes{info->size_bytes};
333 const ShaderInfo* const result{info.get()};
334 Register(std::move(info), cpu_addr, size_bytes);
335 return result;
336}
337
338GraphicsPipeline PipelineCache::CreateGraphicsPipeline() {
339 flow_block_pool.ReleaseContents();
340 inst_pool.ReleaseContents();
341 block_pool.ReleaseContents();
342
343 std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> envs;
344 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
345
346 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
347 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
348 if (graphics_key.unique_hashes[index] == u128{}) {
349 continue;
350 }
351 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
352 GraphicsEnvironment& env{envs[index]};
353 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
354 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
355
356 const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader);
357 Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset);
358 programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg);
359 }
360 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
361 std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
362
363 u32 binding{0};
364 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
365 if (graphics_key.unique_hashes[index] == u128{}) {
366 continue;
367 }
368 UNIMPLEMENTED_IF(index == 0);
369
370 GraphicsEnvironment& env{envs[index]};
371 Shader::IR::Program& program{programs[index]};
372
373 const size_t stage_index{index - 1};
374 infos[stage_index] = &program.info;
375 std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
376
377 FILE* file = fopen("D:\\shader.spv", "wb");
378 fwrite(code.data(), 4, code.size(), file);
379 fclose(file);
380 std::system("spirv-cross --vulkan-semantics D:\\shader.spv");
381
382 modules[stage_index] = BuildShader(device, code);
383 }
384 return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device,
385 descriptor_pool, update_descriptor_queue, render_pass_cache,
386 graphics_key.state, std::move(modules), infos);
387}
388
177ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { 389ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
178 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; 390 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
179 const auto& qmd{kepler_compute.launch_description}; 391 const auto& qmd{kepler_compute.launch_description};
180 Environment env{kepler_compute, gpu_memory, program_base}; 392 ComputeEnvironment env{kepler_compute, gpu_memory, program_base};
181 if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { 393 if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
182 // TODO: Load from cache 394 // TODO: Load from cache
183 } 395 }
184 const auto& float_control{device.FloatControlProperties()}; 396 flow_block_pool.ReleaseContents();
185 const Shader::Profile profile{ 397 inst_pool.ReleaseContents();
186 .unified_descriptor_binding = true, 398 block_pool.ReleaseContents();
187 .support_float_controls = true, 399
188 .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == 400 Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start};
189 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, 401 Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)};
190 .support_separate_rounding_mode = 402 u32 binding{0};
191 float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, 403 std::vector<u32> code{EmitSPIRV(profile, env, program, binding)};
192 .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
193 .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
194 .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
195 .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
196 .support_fp16_signed_zero_nan_preserve =
197 float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
198 .support_fp32_signed_zero_nan_preserve =
199 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
200 .has_broken_spirv_clamp = true, // TODO: is_intel
201 };
202 const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)};
203 /* 404 /*
204 FILE* file = fopen("D:\\shader.spv", "wb"); 405 FILE* file = fopen("D:\\shader.spv", "wb");
205 fwrite(code.data(), 4, code.size(), file); 406 fwrite(code.data(), 4, code.size(), file);
206 fclose(file); 407 fclose(file);
207 std::system("spirv-dis D:\\shader.spv"); 408 std::system("spirv-dis D:\\shader.spv");
208 */ 409 */
209 shader_info->unique_hash = env.ComputeHash(); 410 shader_info->unique_hash = env.CalculateHash();
210 shader_info->size_bytes = env.ShaderSize(); 411 shader_info->size_bytes = env.ReadSize();
211 return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, 412 return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info,
212 BuildShader(device, code)}; 413 BuildShader(device, code)};
213} 414}
214 415
@@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_
216 ShaderInfo shader; 417 ShaderInfo shader;
217 ComputePipeline pipeline{CreateComputePipeline(&shader)}; 418 ComputePipeline pipeline{CreateComputePipeline(&shader)};
218 const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; 419 const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
219 shader.compute_users.push_back(key);
220 pipeline.AddRef();
221
222 const size_t size_bytes{shader.size_bytes}; 420 const size_t size_bytes{shader.size_bytes};
223 Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes); 421 Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
224 return &compute_cache.emplace(key, std::move(pipeline)).first->second; 422 return &compute_cache.emplace(key, std::move(pipeline)).first->second;
@@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash)
233 }; 431 };
234} 432}
235 433
236void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
237 for (const ComputePipelineCacheKey& key : shader->compute_users) {
238 const auto it = compute_cache.find(key);
239 ASSERT(it != compute_cache.end());
240
241 Pipeline& pipeline = it->second;
242 if (pipeline.RemoveRef()) {
243 // Wait for the pipeline to be free of GPU usage before destroying it
244 scheduler.Wait(pipeline.UsageTick());
245 compute_cache.erase(it);
246 }
247 }
248}
249
250} // namespace Vulkan 434} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index eb35abc27..60fb976df 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -12,11 +12,18 @@
12#include <utility> 12#include <utility>
13#include <vector> 13#include <vector>
14 14
15#include <boost/functional/hash.hpp>
16
17#include "common/common_types.h" 15#include "common/common_types.h"
16#include "shader_recompiler/frontend/ir/basic_block.h"
17#include "shader_recompiler/frontend/ir/microinstruction.h"
18#include "shader_recompiler/frontend/maxwell/control_flow.h"
19#include "shader_recompiler/object_pool.h"
20#include "shader_recompiler/profile.h"
18#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 22#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
23#include "video_core/renderer_vulkan/vk_buffer_cache.h"
24#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
25#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
26#include "video_core/renderer_vulkan/vk_texture_cache.h"
20#include "video_core/shader_cache.h" 27#include "video_core/shader_cache.h"
21#include "video_core/vulkan_common/vulkan_wrapper.h" 28#include "video_core/vulkan_common/vulkan_wrapper.h"
22 29
@@ -26,13 +33,6 @@ class System;
26 33
27namespace Vulkan { 34namespace Vulkan {
28 35
29class Device;
30class RasterizerVulkan;
31class ComputePipeline;
32class VKDescriptorPool;
33class VKScheduler;
34class VKUpdateDescriptorQueue;
35
36using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36using Maxwell = Tegra::Engines::Maxwell3D::Regs;
37 37
38struct ComputePipelineCacheKey { 38struct ComputePipelineCacheKey {
@@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>)
52static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>); 52static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
53static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); 53static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
54 54
55struct GraphicsPipelineCacheKey {
56 std::array<u128, 6> unique_hashes;
57 FixedPipelineState state;
58
59 size_t Hash() const noexcept;
60
61 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
62
63 bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
64 return !operator==(rhs);
65 }
66
67 size_t Size() const noexcept {
68 return sizeof(unique_hashes) + state.Size();
69 }
70};
71static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
72static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
73static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
74
55} // namespace Vulkan 75} // namespace Vulkan
56 76
57namespace std { 77namespace std {
@@ -63,14 +83,28 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
63 } 83 }
64}; 84};
65 85
86template <>
87struct hash<Vulkan::GraphicsPipelineCacheKey> {
88 size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
89 return k.Hash();
90 }
91};
92
66} // namespace std 93} // namespace std
67 94
68namespace Vulkan { 95namespace Vulkan {
69 96
97class ComputePipeline;
98class Device;
99class RasterizerVulkan;
100class RenderPassCache;
101class VKDescriptorPool;
102class VKScheduler;
103class VKUpdateDescriptorQueue;
104
70struct ShaderInfo { 105struct ShaderInfo {
71 u128 unique_hash{}; 106 u128 unique_hash{};
72 size_t size_bytes{}; 107 size_t size_bytes{};
73 std::vector<ComputePipelineCacheKey> compute_users;
74}; 108};
75 109
76class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { 110class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
@@ -80,15 +114,23 @@ public:
80 Tegra::Engines::KeplerCompute& kepler_compute, 114 Tegra::Engines::KeplerCompute& kepler_compute,
81 Tegra::MemoryManager& gpu_memory, const Device& device, 115 Tegra::MemoryManager& gpu_memory, const Device& device,
82 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, 116 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
83 VKUpdateDescriptorQueue& update_descriptor_queue); 117 VKUpdateDescriptorQueue& update_descriptor_queue,
118 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
119 TextureCache& texture_cache);
84 ~PipelineCache() override; 120 ~PipelineCache() override;
85 121
86 [[nodiscard]] ComputePipeline* CurrentComputePipeline(); 122 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
87 123
88protected: 124 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
89 void OnShaderRemoval(ShaderInfo* shader) override;
90 125
91private: 126private:
127 bool RefreshStages();
128
129 const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr,
130 u32 start_address, VAddr cpu_addr);
131
132 GraphicsPipeline CreateGraphicsPipeline();
133
92 ComputePipeline CreateComputePipeline(ShaderInfo* shader); 134 ComputePipeline CreateComputePipeline(ShaderInfo* shader);
93 135
94 ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); 136 ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
@@ -104,8 +146,20 @@ private:
104 VKScheduler& scheduler; 146 VKScheduler& scheduler;
105 VKDescriptorPool& descriptor_pool; 147 VKDescriptorPool& descriptor_pool;
106 VKUpdateDescriptorQueue& update_descriptor_queue; 148 VKUpdateDescriptorQueue& update_descriptor_queue;
149 RenderPassCache& render_pass_cache;
150 BufferCache& buffer_cache;
151 TextureCache& texture_cache;
152
153 GraphicsPipelineCacheKey graphics_key{};
107 154
108 std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache; 155 std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
156 std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache;
157
158 Shader::ObjectPool<Shader::IR::Inst> inst_pool;
159 Shader::ObjectPool<Shader::IR::Block> block_pool;
160 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block_pool;
161
162 Shader::Profile profile;
109}; 163};
110 164
111} // namespace Vulkan 165} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index c94419d29..036b531b9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
141 blit_image(device, scheduler, state_tracker, descriptor_pool), 141 blit_image(device, scheduler, state_tracker, descriptor_pool),
142 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, 142 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
143 memory_allocator), 143 memory_allocator),
144 texture_cache_runtime{device, scheduler, memory_allocator, 144 render_pass_cache(device), texture_cache_runtime{device, scheduler,
145 staging_pool, blit_image, astc_decoder_pass}, 145 memory_allocator, staging_pool,
146 blit_image, astc_decoder_pass,
147 render_pass_cache},
146 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 148 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
147 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 149 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
148 update_descriptor_queue, descriptor_pool), 150 update_descriptor_queue, descriptor_pool),
149 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 151 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
150 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 152 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
151 descriptor_pool, update_descriptor_queue), 153 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
152 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, 154 texture_cache),
155 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },
153 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 156 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
154 wfi_event(device.GetLogical().CreateEvent()) { 157 wfi_event(device.GetLogical().CreateEvent()) {
155 scheduler.SetQueryCache(query_cache); 158 scheduler.SetQueryCache(query_cache);
@@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
158RasterizerVulkan::~RasterizerVulkan() = default; 161RasterizerVulkan::~RasterizerVulkan() = default;
159 162
160void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { 163void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
161 UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); 164 MICROPROFILE_SCOPE(Vulkan_Drawing);
165
166 SCOPE_EXIT({ gpu.TickWork(); });
167 FlushWork();
168
169 query_cache.UpdateCounters();
170
171 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
172 if (!pipeline) {
173 return;
174 }
175 update_descriptor_queue.Acquire();
176 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
177 pipeline->Configure(is_indexed);
178
179 BeginTransformFeedback();
180
181 scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
182 UpdateDynamicStates();
183
184 const auto& regs{maxwell3d.regs};
185 const u32 num_instances{maxwell3d.mme_draw.instance_count};
186 const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
187 scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
188 if (draw_params.is_indexed) {
189 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
190 draw_params.base_vertex, draw_params.base_instance);
191 } else {
192 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
193 draw_params.base_vertex, draw_params.base_instance);
194 }
195 });
196 EndTransformFeedback();
162} 197}
163 198
164void RasterizerVulkan::Clear() { 199void RasterizerVulkan::Clear() {
@@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() {
487 if ((++draw_counter & 7) != 7) { 522 if ((++draw_counter & 7) != 7) {
488 return; 523 return;
489 } 524 }
490
491 if (draw_counter < DRAWS_TO_DISPATCH) { 525 if (draw_counter < DRAWS_TO_DISPATCH) {
492 // Send recorded tasks to the worker thread 526 // Send recorded tasks to the worker thread
493 scheduler.DispatchWork(); 527 scheduler.DispatchWork();
494 return; 528 return;
495 } 529 }
496
497 // Otherwise (every certain number of draws) flush execution. 530 // Otherwise (every certain number of draws) flush execution.
498 // This submits commands to the Vulkan driver. 531 // This submits commands to the Vulkan driver.
499 scheduler.Flush(); 532 scheduler.Flush();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3fd03b915..88dbd753b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -23,6 +23,7 @@
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 23#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
25#include "video_core/renderer_vulkan/vk_query_cache.h" 25#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
28#include "video_core/renderer_vulkan/vk_texture_cache.h" 29#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -148,6 +149,7 @@ private:
148 VKUpdateDescriptorQueue update_descriptor_queue; 149 VKUpdateDescriptorQueue update_descriptor_queue;
149 BlitImageHelper blit_image; 150 BlitImageHelper blit_image;
150 ASTCDecoderPass astc_decoder_pass; 151 ASTCDecoderPass astc_decoder_pass;
152 RenderPassCache render_pass_cache;
151 153
152 TextureCacheRuntime texture_cache_runtime; 154 TextureCacheRuntime texture_cache_runtime;
153 TextureCache texture_cache; 155 TextureCache texture_cache;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..7e5ae43ea
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,100 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include <boost/container/static_vector.hpp>
10
11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
13#include "video_core/surface.h"
14#include "video_core/vulkan_common/vulkan_device.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h"
16
17namespace Vulkan {
18namespace {
19using VideoCore::Surface::PixelFormat;
20
21constexpr std::array ATTACHMENT_REFERENCES{
22 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
23 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
24 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
25 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
26 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
27 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
28 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
29 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
30 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
31};
32
33VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
34 VkSampleCountFlagBits samples) {
35 using MaxwellToVK::SurfaceFormat;
36 return {
37 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
38 .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
39 .samples = samples,
40 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
41 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
42 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
43 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
44 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
45 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
46 };
47}
48} // Anonymous namespace
49
50RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
51
52VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
53 const auto [pair, is_new] = cache.try_emplace(key);
54 if (!is_new) {
55 return *pair->second;
56 }
57 boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
58 u32 num_images{0};
59
60 for (size_t index = 0; index < key.color_formats.size(); ++index) {
61 const PixelFormat format{key.color_formats[index]};
62 if (format == PixelFormat::Invalid) {
63 continue;
64 }
65 descriptions.push_back(AttachmentDescription(*device, format, key.samples));
66 ++num_images;
67 }
68 const size_t num_colors{descriptions.size()};
69 const VkAttachmentReference* depth_attachment{};
70 if (key.depth_format != PixelFormat::Invalid) {
71 depth_attachment = &ATTACHMENT_REFERENCES[num_colors];
72 descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
73 }
74 const VkSubpassDescription subpass{
75 .flags = 0,
76 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
77 .inputAttachmentCount = 0,
78 .pInputAttachments = nullptr,
79 .colorAttachmentCount = static_cast<u32>(num_colors),
80 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
81 .pResolveAttachments = nullptr,
82 .pDepthStencilAttachment = depth_attachment,
83 .preserveAttachmentCount = 0,
84 .pPreserveAttachments = nullptr,
85 };
86 pair->second = device->GetLogical().CreateRenderPass({
87 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
88 .pNext = nullptr,
89 .flags = 0,
90 .attachmentCount = static_cast<u32>(descriptions.size()),
91 .pAttachments = descriptions.data(),
92 .subpassCount = 1,
93 .pSubpasses = &subpass,
94 .dependencyCount = 0,
95 .pDependencies = nullptr,
96 });
97 return *pair->second;
98}
99
100} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..db8e83f1a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "video_core/surface.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h"
11
12namespace Vulkan {
13
14struct RenderPassKey {
15 auto operator<=>(const RenderPassKey&) const noexcept = default;
16
17 std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
18 VideoCore::Surface::PixelFormat depth_format;
19 VkSampleCountFlagBits samples;
20};
21
22} // namespace Vulkan
23
24namespace std {
25template <>
26struct hash<Vulkan::RenderPassKey> {
27 [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
28 size_t value = static_cast<size_t>(key.depth_format) << 48;
29 value ^= static_cast<size_t>(key.samples) << 52;
30 for (size_t i = 0; i < key.color_formats.size(); ++i) {
31 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
32 }
33 return value;
34 }
35};
36} // namespace std
37
38namespace Vulkan {
39
40 class Device;
41
42class RenderPassCache {
43public:
44 explicit RenderPassCache(const Device& device_);
45
46 VkRenderPass Get(const RenderPassKey& key);
47
48private:
49 const Device* device{};
50 std::unordered_map<RenderPassKey, vk::RenderPass> cache;
51};
52
53} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 88ccf96f5..1bbc542a1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -18,6 +18,7 @@
18#include "video_core/renderer_vulkan/vk_scheduler.h" 18#include "video_core/renderer_vulkan/vk_scheduler.h"
19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
20#include "video_core/renderer_vulkan/vk_texture_cache.h" 20#include "video_core/renderer_vulkan/vk_texture_cache.h"
21#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
21#include "video_core/vulkan_common/vulkan_device.h" 22#include "video_core/vulkan_common/vulkan_device.h"
22#include "video_core/vulkan_common/vulkan_memory_allocator.h" 23#include "video_core/vulkan_common/vulkan_memory_allocator.h"
23#include "video_core/vulkan_common/vulkan_wrapper.h" 24#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
34using VideoCore::Surface::IsPixelFormatASTC; 35using VideoCore::Surface::IsPixelFormatASTC;
35 36
36namespace { 37namespace {
37
38constexpr std::array ATTACHMENT_REFERENCES{
39 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
42 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
43 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
44 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
45 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
46 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
47 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
48};
49
50constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { 38constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
51 if (color == std::array<float, 4>{0, 0, 0, 0}) { 39 if (color == std::array<float, 4>{0, 0, 0, 0}) {
52 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; 40 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
226 } 214 }
227} 215}
228 216
229[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
230 const ImageView* image_view) {
231 using MaxwellToVK::SurfaceFormat;
232 const PixelFormat pixel_format = image_view->format;
233 return VkAttachmentDescription{
234 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
235 .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
236 .samples = image_view->Samples(),
237 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
238 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
239 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
240 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
241 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
242 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
243 };
244}
245
246[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { 217[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
247 switch (swizzle) { 218 switch (swizzle) {
248 case SwizzleSource::Zero: 219 case SwizzleSource::Zero:
@@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
1164 1135
1165Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, 1136Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1166 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { 1137 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1167 std::vector<VkAttachmentDescription> descriptions;
1168 std::vector<VkImageView> attachments; 1138 std::vector<VkImageView> attachments;
1169 RenderPassKey renderpass_key{}; 1139 RenderPassKey renderpass_key{};
1170 s32 num_layers = 1; 1140 s32 num_layers = 1;
@@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1175 renderpass_key.color_formats[index] = PixelFormat::Invalid; 1145 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1176 continue; 1146 continue;
1177 } 1147 }
1178 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1179 attachments.push_back(color_buffer->RenderTarget()); 1148 attachments.push_back(color_buffer->RenderTarget());
1180 renderpass_key.color_formats[index] = color_buffer->format; 1149 renderpass_key.color_formats[index] = color_buffer->format;
1181 num_layers = std::max(num_layers, color_buffer->range.extent.layers); 1150 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1185 ++num_images; 1154 ++num_images;
1186 } 1155 }
1187 const size_t num_colors = attachments.size(); 1156 const size_t num_colors = attachments.size();
1188 const VkAttachmentReference* depth_attachment =
1189 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1190 if (depth_buffer) { 1157 if (depth_buffer) {
1191 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1192 attachments.push_back(depth_buffer->RenderTarget()); 1158 attachments.push_back(depth_buffer->RenderTarget());
1193 renderpass_key.depth_format = depth_buffer->format; 1159 renderpass_key.depth_format = depth_buffer->format;
1194 num_layers = std::max(num_layers, depth_buffer->range.extent.layers); 1160 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1201 } 1167 }
1202 renderpass_key.samples = samples; 1168 renderpass_key.samples = samples;
1203 1169
1204 const auto& device = runtime.device.GetLogical(); 1170 renderpass = runtime.render_pass_cache.Get(renderpass_key);
1205 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); 1171
1206 if (is_new) {
1207 const VkSubpassDescription subpass{
1208 .flags = 0,
1209 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1210 .inputAttachmentCount = 0,
1211 .pInputAttachments = nullptr,
1212 .colorAttachmentCount = static_cast<u32>(num_colors),
1213 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1214 .pResolveAttachments = nullptr,
1215 .pDepthStencilAttachment = depth_attachment,
1216 .preserveAttachmentCount = 0,
1217 .pPreserveAttachments = nullptr,
1218 };
1219 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1220 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1221 .pNext = nullptr,
1222 .flags = 0,
1223 .attachmentCount = static_cast<u32>(descriptions.size()),
1224 .pAttachments = descriptions.data(),
1225 .subpassCount = 1,
1226 .pSubpasses = &subpass,
1227 .dependencyCount = 0,
1228 .pDependencies = nullptr,
1229 });
1230 }
1231 renderpass = *cache_pair->second;
1232 render_area = VkExtent2D{ 1172 render_area = VkExtent2D{
1233 .width = key.size.width, 1173 .width = key.size.width,
1234 .height = key.size.height, 1174 .height = key.size.height,
1235 }; 1175 };
1236 num_color_buffers = static_cast<u32>(num_colors); 1176 num_color_buffers = static_cast<u32>(num_colors);
1237 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ 1177 framebuffer = runtime.device.GetLogical().CreateFramebuffer({
1238 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 1178 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1239 .pNext = nullptr, 1179 .pNext = nullptr,
1240 .flags = 0, 1180 .flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98..189ee5a68 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -26,35 +26,10 @@ class Device;
26class Image; 26class Image;
27class ImageView; 27class ImageView;
28class Framebuffer; 28class Framebuffer;
29class RenderPassCache;
29class StagingBufferPool; 30class StagingBufferPool;
30class VKScheduler; 31class VKScheduler;
31 32
32struct RenderPassKey {
33 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34
35 std::array<PixelFormat, NUM_RT> color_formats;
36 PixelFormat depth_format;
37 VkSampleCountFlagBits samples;
38};
39
40} // namespace Vulkan
41
42namespace std {
43template <>
44struct hash<Vulkan::RenderPassKey> {
45 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 size_t value = static_cast<size_t>(key.depth_format) << 48;
47 value ^= static_cast<size_t>(key.samples) << 52;
48 for (size_t i = 0; i < key.color_formats.size(); ++i) {
49 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
50 }
51 return value;
52 }
53};
54} // namespace std
55
56namespace Vulkan {
57
58struct TextureCacheRuntime { 33struct TextureCacheRuntime {
59 const Device& device; 34 const Device& device;
60 VKScheduler& scheduler; 35 VKScheduler& scheduler;
@@ -62,7 +37,7 @@ struct TextureCacheRuntime {
62 StagingBufferPool& staging_buffer_pool; 37 StagingBufferPool& staging_buffer_pool;
63 BlitImageHelper& blit_image_helper; 38 BlitImageHelper& blit_image_helper;
64 ASTCDecoderPass& astc_decoder_pass; 39 ASTCDecoderPass& astc_decoder_pass;
65 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; 40 RenderPassCache& render_pass_cache;
66 41
67 void Finish(); 42 void Finish();
68 43
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 4887d6fd9..f0e5b098c 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
49 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 49 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
50 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, 50 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
51 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, 51 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
52 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
52#ifdef _WIN32 53#ifdef _WIN32
53 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, 54 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
54#endif 55#endif
@@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
312 }; 313 };
313 SetNext(next, host_query_reset); 314 SetNext(next, host_query_reset);
314 315
316 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
317 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
318 .pNext = nullptr,
319 .shaderDemoteToHelperInvocation = true,
320 };
321 SetNext(next, demote);
322
315 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; 323 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
316 if (is_float16_supported) { 324 if (is_float16_supported) {
317 float16_int8 = { 325 float16_int8 = {
@@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const {
597 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 605 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
598 } 606 }
599 } 607 }
608 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
609 demote.sType =
610 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
611 demote.pNext = nullptr;
612
600 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; 613 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
601 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; 614 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
615 robustness2.pNext = &demote;
602 616
603 VkPhysicalDeviceFeatures2KHR features2{}; 617 VkPhysicalDeviceFeatures2KHR features2{};
604 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 618 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
625 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 639 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
626 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 640 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
627 "shaderStorageImageWriteWithoutFormat"), 641 "shaderStorageImageWriteWithoutFormat"),
642 std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
628 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), 643 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
629 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), 644 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
630 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), 645 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),