diff options
12 files changed, 180 insertions, 3 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 051e5d05a..151733090 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -162,6 +162,7 @@ add_library(shader_recompiler STATIC | |||
| 162 | ir_opt/collect_shader_info_pass.cpp | 162 | ir_opt/collect_shader_info_pass.cpp |
| 163 | ir_opt/constant_propagation_pass.cpp | 163 | ir_opt/constant_propagation_pass.cpp |
| 164 | ir_opt/dead_code_elimination_pass.cpp | 164 | ir_opt/dead_code_elimination_pass.cpp |
| 165 | ir_opt/dual_vertex_pass.cpp | ||
| 165 | ir_opt/global_memory_to_storage_buffer_pass.cpp | 166 | ir_opt/global_memory_to_storage_buffer_pass.cpp |
| 166 | ir_opt/identity_removal_pass.cpp | 167 | ir_opt/identity_removal_pass.cpp |
| 167 | ir_opt/lower_fp16_to_fp32.cpp | 168 | ir_opt/lower_fp16_to_fp32.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4562db45b..c352bbd84 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -25,6 +25,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal | |||
| 25 | void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); | 25 | void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); |
| 26 | void EmitSelectionMerge(EmitContext& ctx, Id merge_label); | 26 | void EmitSelectionMerge(EmitContext& ctx, Id merge_label); |
| 27 | void EmitReturn(EmitContext& ctx); | 27 | void EmitReturn(EmitContext& ctx); |
| 28 | void EmitJoin(EmitContext& ctx); | ||
| 28 | void EmitUnreachable(EmitContext& ctx); | 29 | void EmitUnreachable(EmitContext& ctx); |
| 29 | void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); | 30 | void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); |
| 30 | void EmitBarrier(EmitContext& ctx); | 31 | void EmitBarrier(EmitContext& ctx); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 335603f88..d3a1db340 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp | |||
| @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { | |||
| 26 | ctx.OpReturn(); | 26 | ctx.OpReturn(); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | void EmitJoin(EmitContext&) { | ||
| 30 | throw NotImplementedException("Join shouldn't be emitted"); | ||
| 31 | } | ||
| 32 | |||
| 29 | void EmitUnreachable(EmitContext& ctx) { | 33 | void EmitUnreachable(EmitContext& ctx) { |
| 30 | ctx.OpUnreachable(); | 34 | ctx.OpUnreachable(); |
| 31 | } | 35 | } |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5c1b02d53..dba902186 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept { | |||
| 61 | case Opcode::LoopMerge: | 61 | case Opcode::LoopMerge: |
| 62 | case Opcode::SelectionMerge: | 62 | case Opcode::SelectionMerge: |
| 63 | case Opcode::Return: | 63 | case Opcode::Return: |
| 64 | case Opcode::Join: | ||
| 64 | case Opcode::Unreachable: | 65 | case Opcode::Unreachable: |
| 65 | case Opcode::DemoteToHelperInvocation: | 66 | case Opcode::DemoteToHelperInvocation: |
| 66 | case Opcode::Barrier: | 67 | case Opcode::Barrier: |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8f32c9e74..b14719c51 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, | |||
| 13 | OPCODE(LoopMerge, Void, Label, Label, ) | 13 | OPCODE(LoopMerge, Void, Label, Label, ) |
| 14 | OPCODE(SelectionMerge, Void, Label, ) | 14 | OPCODE(SelectionMerge, Void, Label, ) |
| 15 | OPCODE(Return, Void, ) | 15 | OPCODE(Return, Void, ) |
| 16 | OPCODE(Join, Void, ) | ||
| 16 | OPCODE(Unreachable, Void, ) | 17 | OPCODE(Unreachable, Void, ) |
| 17 | OPCODE(DemoteToHelperInvocation, Void, Label, ) | 18 | OPCODE(DemoteToHelperInvocation, Void, Label, ) |
| 18 | 19 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aee96eae3..59897cb3e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 150 | return program; | 150 | return program; |
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 154 | Environment& env2) { | ||
| 155 | IR::Program program{}; | ||
| 156 | Optimization::VertexATransformPass(vertex_a); | ||
| 157 | Optimization::VertexBTransformPass(vertex_b); | ||
| 158 | program.blocks.swap(vertex_a.blocks); | ||
| 159 | for (IR::Block* block : vertex_b.blocks) { | ||
| 160 | program.blocks.push_back(block); | ||
| 161 | } | ||
| 162 | program.stage = Stage::VertexB; | ||
| 163 | program.info = vertex_a.info; | ||
| 164 | program.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); | ||
| 165 | |||
| 166 | for (size_t index = 0; index < 32; index++) { | ||
| 167 | program.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; | ||
| 168 | program.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; | ||
| 169 | } | ||
| 170 | Optimization::JoinTextureInfo(program.info, vertex_b.info); | ||
| 171 | Optimization::JoinStorageInfo(program.info, vertex_b.info); | ||
| 172 | Optimization::DualVertexJoinPass(program); | ||
| 173 | program.post_order_blocks = PostOrder(program.blocks); | ||
| 174 | Optimization::DeadCodeEliminationPass(program); | ||
| 175 | Optimization::IdentityRemovalPass(program); | ||
| 176 | Optimization::VerificationPass(program); | ||
| 177 | Optimization::CollectShaderInfoPass(env2, program); | ||
| 178 | return program; | ||
| 179 | } | ||
| 180 | |||
| 153 | } // namespace Shader::Maxwell | 181 | } // namespace Shader::Maxwell |
diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 542621a1d..6e5d5ddd0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h | |||
| @@ -21,4 +21,6 @@ namespace Shader::Maxwell { | |||
| 21 | ObjectPool<IR::Block>& block_pool, Environment& env, | 21 | ObjectPool<IR::Block>& block_pool, Environment& env, |
| 22 | Flow::CFG& cfg); | 22 | Flow::CFG& cfg); |
| 23 | 23 | ||
| 24 | [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 25 | Environment& env_vertex_b); | ||
| 24 | } // namespace Shader::Maxwell | 26 | } // namespace Shader::Maxwell |
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..f35c6478a --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <ranges> | ||
| 7 | #include <tuple> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "common/bit_cast.h" | ||
| 11 | #include "common/bit_util.h" | ||
| 12 | #include "shader_recompiler/exception.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 15 | |||
| 16 | namespace Shader::Optimization { | ||
| 17 | |||
| 18 | void VertexATransformPass(IR::Program& program) { | ||
| 19 | bool replaced_join{}; | ||
| 20 | bool eliminated_epilogue{}; | ||
| 21 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 22 | for (IR::Inst& inst : block->Instructions()) { | ||
| 23 | switch (inst.GetOpcode()) { | ||
| 24 | case IR::Opcode::Return: | ||
| 25 | inst.ReplaceOpcode(IR::Opcode::Join); | ||
| 26 | replaced_join = true; | ||
| 27 | break; | ||
| 28 | case IR::Opcode::Epilogue: | ||
| 29 | inst.Invalidate(); | ||
| 30 | eliminated_epilogue = true; | ||
| 31 | break; | ||
| 32 | default: | ||
| 33 | break; | ||
| 34 | } | ||
| 35 | if (replaced_join && eliminated_epilogue) { | ||
| 36 | return; | ||
| 37 | } | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | void VertexBTransformPass(IR::Program& program) { | ||
| 43 | for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { | ||
| 44 | for (IR::Inst& inst : block->Instructions()) { | ||
| 45 | if (inst.GetOpcode() == IR::Opcode::Prologue) { | ||
| 46 | return inst.Invalidate(); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | void DualVertexJoinPass(IR::Program& program) { | ||
| 53 | const auto& blocks = program.blocks; | ||
| 54 | s64 s = static_cast<s64>(blocks.size()) - 1; | ||
| 55 | if (s < 1) { | ||
| 56 | throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); | ||
| 57 | } | ||
| 58 | for (s64 index = 0; index < s; index++) { | ||
| 59 | IR::Block* const current_block = blocks[index]; | ||
| 60 | IR::Block* const next_block = blocks[index + 1]; | ||
| 61 | for (IR::Inst& inst : current_block->Instructions()) { | ||
| 62 | if (inst.GetOpcode() == IR::Opcode::Join) { | ||
| 63 | IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 64 | ir.Branch(next_block); | ||
| 65 | inst.Invalidate(); | ||
| 66 | // only 1 join should exist | ||
| 67 | return; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); | ||
| 72 | } | ||
| 73 | |||
| 74 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 87eca2a0d..1d11a00d8 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 499 | } | 499 | } |
| 500 | } | 500 | } |
| 501 | 501 | ||
| 502 | template <typename Descriptors, typename Descriptor, typename Func> | ||
| 503 | static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||
| 504 | // TODO: Handle arrays | ||
| 505 | const auto it{std::ranges::find_if(descriptors, pred)}; | ||
| 506 | if (it != descriptors.end()) { | ||
| 507 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 508 | } | ||
| 509 | descriptors.push_back(desc); | ||
| 510 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 511 | } | ||
| 512 | |||
| 513 | void JoinStorageInfo(Info& base, Info& source) { | ||
| 514 | auto& descriptors = base.storage_buffers_descriptors; | ||
| 515 | for (auto& desc : source.storage_buffers_descriptors) { | ||
| 516 | auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { | ||
| 517 | return desc.cbuf_index == existing.cbuf_index && | ||
| 518 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; | ||
| 519 | })}; | ||
| 520 | if (it != descriptors.end()) { | ||
| 521 | it->is_written |= desc.is_written; | ||
| 522 | continue; | ||
| 523 | } | ||
| 524 | descriptors.push_back(desc); | ||
| 525 | } | ||
| 526 | } | ||
| 527 | |||
| 502 | } // namespace Shader::Optimization | 528 | } // namespace Shader::Optimization |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 186104713..e9cb8546a 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program); | |||
| 22 | void TexturePass(Environment& env, IR::Program& program); | 22 | void TexturePass(Environment& env, IR::Program& program); |
| 23 | void VerificationPass(const IR::Program& program); | 23 | void VerificationPass(const IR::Program& program); |
| 24 | 24 | ||
| 25 | // Dual Vertex | ||
| 26 | void VertexATransformPass(IR::Program& program); | ||
| 27 | void VertexBTransformPass(IR::Program& program); | ||
| 28 | void DualVertexJoinPass(IR::Program& program); | ||
| 29 | void JoinTextureInfo(Info& base, Info& source); | ||
| 30 | void JoinStorageInfo(Info& base, Info& source); | ||
| 31 | |||
| 25 | } // namespace Shader::Optimization | 32 | } // namespace Shader::Optimization |
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index cfa6b34b9..2b38bcf42 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) { | |||
| 426 | } | 426 | } |
| 427 | } | 427 | } |
| 428 | 428 | ||
| 429 | void JoinTextureInfo(Info& base, Info& source) { | ||
| 430 | Descriptors descriptors{ | ||
| 431 | base.texture_buffer_descriptors, | ||
| 432 | base.image_buffer_descriptors, | ||
| 433 | base.texture_descriptors, | ||
| 434 | base.image_descriptors, | ||
| 435 | }; | ||
| 436 | for (auto& desc : source.texture_buffer_descriptors) { | ||
| 437 | descriptors.Add(desc); | ||
| 438 | } | ||
| 439 | for (auto& desc : source.image_buffer_descriptors) { | ||
| 440 | descriptors.Add(desc); | ||
| 441 | } | ||
| 442 | for (auto& desc : source.texture_descriptors) { | ||
| 443 | descriptors.Add(desc); | ||
| 444 | } | ||
| 445 | for (auto& desc : source.image_descriptors) { | ||
| 446 | descriptors.Add(desc); | ||
| 447 | } | ||
| 448 | } | ||
| 449 | |||
| 429 | } // namespace Shader::Optimization | 450 | } // namespace Shader::Optimization |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0822862fe..638475251 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); | |||
| 47 | 47 | ||
| 48 | namespace { | 48 | namespace { |
| 49 | using Shader::Backend::SPIRV::EmitSPIRV; | 49 | using Shader::Backend::SPIRV::EmitSPIRV; |
| 50 | using Shader::Maxwell::MergeDualVertexPrograms; | ||
| 50 | using Shader::Maxwell::TranslateProgram; | 51 | using Shader::Maxwell::TranslateProgram; |
| 51 | using VideoCommon::ComputeEnvironment; | 52 | using VideoCommon::ComputeEnvironment; |
| 52 | using VideoCommon::FileEnvironment; | 53 | using VideoCommon::FileEnvironment; |
| @@ -287,22 +288,32 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 287 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | 288 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 288 | size_t env_index{0}; | 289 | size_t env_index{0}; |
| 289 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | 290 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 291 | bool uses_vertex_a{}; | ||
| 292 | std::size_t start_value_processing{}; | ||
| 290 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 293 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 291 | if (key.unique_hashes[index] == 0) { | 294 | if (key.unique_hashes[index] == 0) { |
| 292 | continue; | 295 | continue; |
| 293 | } | 296 | } |
| 297 | uses_vertex_a |= index == 0; | ||
| 294 | Shader::Environment& env{*envs[env_index]}; | 298 | Shader::Environment& env{*envs[env_index]}; |
| 295 | ++env_index; | 299 | ++env_index; |
| 296 | 300 | ||
| 297 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | 301 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; |
| 298 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); | 302 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); |
| 299 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | 303 | if (!uses_vertex_a || index != 1) { |
| 304 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||
| 305 | continue; | ||
| 306 | } | ||
| 307 | Shader::IR::Program& program_va{programs[0]}; | ||
| 308 | Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||
| 309 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||
| 310 | start_value_processing = 1; | ||
| 300 | } | 311 | } |
| 301 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | 312 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; |
| 302 | std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; | 313 | std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; |
| 303 | 314 | ||
| 304 | u32 binding{0}; | 315 | u32 binding{0}; |
| 305 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 316 | for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { |
| 306 | if (key.unique_hashes[index] == 0) { | 317 | if (key.unique_hashes[index] == 0) { |
| 307 | continue; | 318 | continue; |
| 308 | } | 319 | } |