diff options
Diffstat (limited to 'src')
7 files changed, 140 insertions, 10 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "shader_recompiler/frontend/maxwell/decode.h" | 20 | #include "shader_recompiler/frontend/maxwell/decode.h" |
| 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" |
| 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" |
| 23 | #include "shader_recompiler/host_translate_info.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | 24 | #include "shader_recompiler/object_pool.h" |
| 24 | 25 | ||
| 25 | namespace Shader::Maxwell { | 26 | namespace Shader::Maxwell { |
| @@ -652,7 +653,7 @@ class TranslatePass { | |||
| 652 | public: | 653 | public: |
| 653 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, | 654 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, |
| 654 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, | 655 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, |
| 655 | IR::AbstractSyntaxList& syntax_list_) | 656 | IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) |
| 656 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, | 657 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, |
| 657 | syntax_list{syntax_list_} { | 658 | syntax_list{syntax_list_} { |
| 658 | Visit(root_stmt, nullptr, nullptr); | 659 | Visit(root_stmt, nullptr, nullptr); |
| @@ -660,6 +661,9 @@ public: | |||
| 660 | IR::Block& first_block{*syntax_list.front().data.block}; | 661 | IR::Block& first_block{*syntax_list.front().data.block}; |
| 661 | IR::IREmitter ir(first_block, first_block.begin()); | 662 | IR::IREmitter ir(first_block, first_block.begin()); |
| 662 | ir.Prologue(); | 663 | ir.Prologue(); |
| 664 | if (uses_demote_to_helper && host_info.needs_demote_reorder) { | ||
| 665 | DemoteCombinationPass(); | ||
| 666 | } | ||
| 663 | } | 667 | } |
| 664 | 668 | ||
| 665 | private: | 669 | private: |
| @@ -809,7 +813,14 @@ private: | |||
| 809 | } | 813 | } |
| 810 | case StatementType::Return: { | 814 | case StatementType::Return: { |
| 811 | ensure_block(); | 815 | ensure_block(); |
| 812 | IR::IREmitter{*current_block}.Epilogue(); | 816 | IR::Block* return_block{block_pool.Create(inst_pool)}; |
| 817 | IR::IREmitter{*return_block}.Epilogue(); | ||
| 818 | current_block->AddBranch(return_block); | ||
| 819 | |||
| 820 | auto& merge{syntax_list.emplace_back()}; | ||
| 821 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 822 | merge.data.block = return_block; | ||
| 823 | |||
| 813 | current_block = nullptr; | 824 | current_block = nullptr; |
| 814 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | 825 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; |
| 815 | break; | 826 | break; |
| @@ -824,6 +835,7 @@ private: | |||
| 824 | auto& merge{syntax_list.emplace_back()}; | 835 | auto& merge{syntax_list.emplace_back()}; |
| 825 | merge.type = IR::AbstractSyntaxNode::Type::Block; | 836 | merge.type = IR::AbstractSyntaxNode::Type::Block; |
| 826 | merge.data.block = demote_block; | 837 | merge.data.block = demote_block; |
| 838 | uses_demote_to_helper = true; | ||
| 827 | break; | 839 | break; |
| 828 | } | 840 | } |
| 829 | case StatementType::Unreachable: { | 841 | case StatementType::Unreachable: { |
| @@ -855,11 +867,117 @@ private: | |||
| 855 | return block_pool.Create(inst_pool); | 867 | return block_pool.Create(inst_pool); |
| 856 | } | 868 | } |
| 857 | 869 | ||
| 870 | void DemoteCombinationPass() { | ||
| 871 | using Type = IR::AbstractSyntaxNode::Type; | ||
| 872 | std::vector<IR::Block*> demote_blocks; | ||
| 873 | std::vector<IR::U1> demote_conds; | ||
| 874 | u32 num_epilogues{}; | ||
| 875 | u32 branch_depth{}; | ||
| 876 | for (const IR::AbstractSyntaxNode& node : syntax_list) { | ||
| 877 | if (node.type == Type::If) { | ||
| 878 | ++branch_depth; | ||
| 879 | } | ||
| 880 | if (node.type == Type::EndIf) { | ||
| 881 | --branch_depth; | ||
| 882 | } | ||
| 883 | if (node.type != Type::Block) { | ||
| 884 | continue; | ||
| 885 | } | ||
| 886 | if (branch_depth > 1) { | ||
| 887 | // Skip reordering nested demote branches. | ||
| 888 | continue; | ||
| 889 | } | ||
| 890 | for (const IR::Inst& inst : node.data.block->Instructions()) { | ||
| 891 | const IR::Opcode op{inst.GetOpcode()}; | ||
| 892 | if (op == IR::Opcode::DemoteToHelperInvocation) { | ||
| 893 | demote_blocks.push_back(node.data.block); | ||
| 894 | break; | ||
| 895 | } | ||
| 896 | if (op == IR::Opcode::Epilogue) { | ||
| 897 | ++num_epilogues; | ||
| 898 | } | ||
| 899 | } | ||
| 900 | } | ||
| 901 | if (demote_blocks.size() == 0) { | ||
| 902 | return; | ||
| 903 | } | ||
| 904 | if (num_epilogues > 1) { | ||
| 905 | LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); | ||
| 906 | return; | ||
| 907 | } | ||
| 908 | s64 last_iterator_offset{}; | ||
| 909 | auto& asl{syntax_list}; | ||
| 910 | for (const IR::Block* demote_block : demote_blocks) { | ||
| 911 | const auto start_it{asl.begin() + last_iterator_offset}; | ||
| 912 | auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||
| 913 | return asn.type == Type::If && asn.data.if_node.body == demote_block; | ||
| 914 | })}; | ||
| 915 | if (asl_it == asl.end()) { | ||
| 916 | // Demote without a conditional branch. | ||
| 917 | // No need to proceed since all fragment instances will be demoted regardless. | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | const IR::Block* const end_if = asl_it->data.if_node.merge; | ||
| 921 | demote_conds.push_back(asl_it->data.if_node.cond); | ||
| 922 | last_iterator_offset = std::distance(asl.begin(), asl_it); | ||
| 923 | |||
| 924 | asl_it = asl.erase(asl_it); | ||
| 925 | asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||
| 926 | return asn.type == Type::Block && asn.data.block == demote_block; | ||
| 927 | }); | ||
| 928 | |||
| 929 | asl_it = asl.erase(asl_it); | ||
| 930 | asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||
| 931 | return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; | ||
| 932 | }); | ||
| 933 | asl_it = asl.erase(asl_it); | ||
| 934 | } | ||
| 935 | const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { | ||
| 936 | if (asn.type != Type::Block) { | ||
| 937 | return false; | ||
| 938 | } | ||
| 939 | for (const auto& inst : asn.data.block->Instructions()) { | ||
| 940 | if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||
| 941 | return true; | ||
| 942 | } | ||
| 943 | } | ||
| 944 | return false; | ||
| 945 | }}; | ||
| 946 | const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; | ||
| 947 | const auto return_block_it{(reverse_it + 1).base()}; | ||
| 948 | |||
| 949 | IR::IREmitter ir{*(return_block_it - 1)->data.block}; | ||
| 950 | IR::U1 cond(IR::Value(false)); | ||
| 951 | for (const auto& demote_cond : demote_conds) { | ||
| 952 | cond = ir.LogicalOr(cond, demote_cond); | ||
| 953 | } | ||
| 954 | cond.Inst()->DestructiveAddUsage(1); | ||
| 955 | |||
| 956 | IR::AbstractSyntaxNode demote_if_node{}; | ||
| 957 | demote_if_node.type = Type::If; | ||
| 958 | demote_if_node.data.if_node.cond = cond; | ||
| 959 | demote_if_node.data.if_node.body = demote_blocks[0]; | ||
| 960 | demote_if_node.data.if_node.merge = return_block_it->data.block; | ||
| 961 | |||
| 962 | IR::AbstractSyntaxNode demote_node{}; | ||
| 963 | demote_node.type = Type::Block; | ||
| 964 | demote_node.data.block = demote_blocks[0]; | ||
| 965 | |||
| 966 | IR::AbstractSyntaxNode demote_endif_node{}; | ||
| 967 | demote_endif_node.type = Type::EndIf; | ||
| 968 | demote_endif_node.data.end_if.merge = return_block_it->data.block; | ||
| 969 | |||
| 970 | asl.insert(return_block_it, demote_endif_node); | ||
| 971 | asl.insert(return_block_it, demote_node); | ||
| 972 | asl.insert(return_block_it, demote_if_node); | ||
| 973 | } | ||
| 974 | |||
| 858 | ObjectPool<Statement>& stmt_pool; | 975 | ObjectPool<Statement>& stmt_pool; |
| 859 | ObjectPool<IR::Inst>& inst_pool; | 976 | ObjectPool<IR::Inst>& inst_pool; |
| 860 | ObjectPool<IR::Block>& block_pool; | 977 | ObjectPool<IR::Block>& block_pool; |
| 861 | Environment& env; | 978 | Environment& env; |
| 862 | IR::AbstractSyntaxList& syntax_list; | 979 | IR::AbstractSyntaxList& syntax_list; |
| 980 | bool uses_demote_to_helper{}; | ||
| 863 | 981 | ||
| 864 | // TODO: C++20 Remove this when all compilers support constexpr std::vector | 982 | // TODO: C++20 Remove this when all compilers support constexpr std::vector |
| 865 | #if __cpp_lib_constexpr_vector >= 201907 | 983 | #if __cpp_lib_constexpr_vector >= 201907 |
| @@ -871,12 +989,13 @@ private: | |||
| 871 | } // Anonymous namespace | 989 | } // Anonymous namespace |
| 872 | 990 | ||
| 873 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 991 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| 874 | Environment& env, Flow::CFG& cfg) { | 992 | Environment& env, Flow::CFG& cfg, |
| 993 | const HostTranslateInfo& host_info) { | ||
| 875 | ObjectPool<Statement> stmt_pool{64}; | 994 | ObjectPool<Statement> stmt_pool{64}; |
| 876 | GotoPass goto_pass{cfg, stmt_pool}; | 995 | GotoPass goto_pass{cfg, stmt_pool}; |
| 877 | Statement& root{goto_pass.RootStatement()}; | 996 | Statement& root{goto_pass.RootStatement()}; |
| 878 | IR::AbstractSyntaxList syntax_list; | 997 | IR::AbstractSyntaxList syntax_list; |
| 879 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; | 998 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; |
| 880 | return syntax_list; | 999 | return syntax_list; |
| 881 | } | 1000 | } |
| 882 | 1001 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h | |||
| @@ -11,10 +11,13 @@ | |||
| 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" |
| 12 | #include "shader_recompiler/object_pool.h" | 12 | #include "shader_recompiler/object_pool.h" |
| 13 | 13 | ||
| 14 | namespace Shader::Maxwell { | 14 | namespace Shader { |
| 15 | struct HostTranslateInfo; | ||
| 16 | namespace Maxwell { | ||
| 15 | 17 | ||
| 16 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, | 18 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, |
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | 19 | ObjectPool<IR::Block>& block_pool, Environment& env, |
| 18 | Flow::CFG& cfg); | 20 | Flow::CFG& cfg, const HostTranslateInfo& host_info); |
| 19 | 21 | ||
| 20 | } // namespace Shader::Maxwell | 22 | } // namespace Maxwell |
| 23 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { | |||
| 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { |
| 132 | IR::Program program; | 132 | IR::Program program; |
| 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); |
| 134 | program.blocks = GenerateBlocks(program.syntax_list); | 134 | program.blocks = GenerateBlocks(program.syntax_list); |
| 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); | 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); |
| 136 | program.stage = env.ShaderStage(); | 136 | program.stage = env.ShaderStage(); |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -11,8 +11,9 @@ namespace Shader { | |||
| 11 | 11 | ||
| 12 | /// Misc information about the host | 12 | /// Misc information about the host |
| 13 | struct HostTranslateInfo { | 13 | struct HostTranslateInfo { |
| 14 | bool support_float16{}; ///< True when the device supports 16-bit floats | 14 | bool support_float16{}; ///< True when the device supports 16-bit floats |
| 15 | bool support_int64{}; ///< True when the device supports 64-bit integers | 15 | bool support_int64{}; ///< True when the device supports 64-bit integers |
| 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | ||
| 16 | }; | 17 | }; |
| 17 | 18 | ||
| 18 | } // namespace Shader | 19 | } // namespace Shader |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee992aed4..de9e41659 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -156,6 +156,10 @@ public: | |||
| 156 | return shader_backend; | 156 | return shader_backend; |
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | bool IsAmd() const { | ||
| 160 | return vendor_name == "ATI Technologies Inc."; | ||
| 161 | } | ||
| 162 | |||
| 159 | private: | 163 | private: |
| 160 | static bool TestVariableAoffi(); | 164 | static bool TestVariableAoffi(); |
| 161 | static bool TestPreciseBug(); | 165 | static bool TestPreciseBug(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f4dda17e..b0e14182e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 219 | host_info{ | 219 | host_info{ |
| 220 | .support_float16 = false, | 220 | .support_float16 = false, |
| 221 | .support_int64 = device.HasShaderInt64(), | 221 | .support_int64 = device.HasShaderInt64(), |
| 222 | .needs_demote_reorder = device.IsAmd(), | ||
| 222 | } { | 223 | } { |
| 223 | if (use_asynchronous_shaders) { | 224 | if (use_asynchronous_shaders) { |
| 224 | workers = CreateWorkers(); | 225 | workers = CreateWorkers(); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f316c4f92..31bfbcb06 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw | |||
| 325 | host_info = Shader::HostTranslateInfo{ | 325 | host_info = Shader::HostTranslateInfo{ |
| 326 | .support_float16 = device.IsFloat16Supported(), | 326 | .support_float16 = device.IsFloat16Supported(), |
| 327 | .support_int64 = device.IsShaderInt64Supported(), | 327 | .support_int64 = device.IsShaderInt64Supported(), |
| 328 | .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || | ||
| 329 | driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, | ||
| 328 | }; | 330 | }; |
| 329 | } | 331 | } |
| 330 | 332 | ||