summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h9
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/host_translate_info.h5
-rw-r--r--src/video_core/renderer_opengl/gl_device.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
7 files changed, 140 insertions, 10 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 8b3e0a15c..69eeaa3e6 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
20#include "shader_recompiler/frontend/maxwell/decode.h" 20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" 21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h" 22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/host_translate_info.h"
23#include "shader_recompiler/object_pool.h" 24#include "shader_recompiler/object_pool.h"
24 25
25namespace Shader::Maxwell { 26namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
652public: 653public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, 654 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, 655 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_) 656 IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, 657 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} { 658 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr); 659 Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
660 IR::Block& first_block{*syntax_list.front().data.block}; 661 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin()); 662 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue(); 663 ir.Prologue();
664 if (uses_demote_to_helper && host_info.needs_demote_reorder) {
665 DemoteCombinationPass();
666 }
663 } 667 }
664 668
665private: 669private:
@@ -809,7 +813,14 @@ private:
809 } 813 }
810 case StatementType::Return: { 814 case StatementType::Return: {
811 ensure_block(); 815 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue(); 816 IR::Block* return_block{block_pool.Create(inst_pool)};
817 IR::IREmitter{*return_block}.Epilogue();
818 current_block->AddBranch(return_block);
819
820 auto& merge{syntax_list.emplace_back()};
821 merge.type = IR::AbstractSyntaxNode::Type::Block;
822 merge.data.block = return_block;
823
813 current_block = nullptr; 824 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; 825 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break; 826 break;
@@ -824,6 +835,7 @@ private:
824 auto& merge{syntax_list.emplace_back()}; 835 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block; 836 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block; 837 merge.data.block = demote_block;
838 uses_demote_to_helper = true;
827 break; 839 break;
828 } 840 }
829 case StatementType::Unreachable: { 841 case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
855 return block_pool.Create(inst_pool); 867 return block_pool.Create(inst_pool);
856 } 868 }
857 869
870 void DemoteCombinationPass() {
871 using Type = IR::AbstractSyntaxNode::Type;
872 std::vector<IR::Block*> demote_blocks;
873 std::vector<IR::U1> demote_conds;
874 u32 num_epilogues{};
875 u32 branch_depth{};
876 for (const IR::AbstractSyntaxNode& node : syntax_list) {
877 if (node.type == Type::If) {
878 ++branch_depth;
879 }
880 if (node.type == Type::EndIf) {
881 --branch_depth;
882 }
883 if (node.type != Type::Block) {
884 continue;
885 }
886 if (branch_depth > 1) {
887 // Skip reordering nested demote branches.
888 continue;
889 }
890 for (const IR::Inst& inst : node.data.block->Instructions()) {
891 const IR::Opcode op{inst.GetOpcode()};
892 if (op == IR::Opcode::DemoteToHelperInvocation) {
893 demote_blocks.push_back(node.data.block);
894 break;
895 }
896 if (op == IR::Opcode::Epilogue) {
897 ++num_epilogues;
898 }
899 }
900 }
901 if (demote_blocks.size() == 0) {
902 return;
903 }
904 if (num_epilogues > 1) {
905 LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
906 return;
907 }
908 s64 last_iterator_offset{};
909 auto& asl{syntax_list};
910 for (const IR::Block* demote_block : demote_blocks) {
911 const auto start_it{asl.begin() + last_iterator_offset};
912 auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
913 return asn.type == Type::If && asn.data.if_node.body == demote_block;
914 })};
915 if (asl_it == asl.end()) {
916 // Demote without a conditional branch.
917 // No need to proceed since all fragment instances will be demoted regardless.
918 return;
919 }
920 const IR::Block* const end_if = asl_it->data.if_node.merge;
921 demote_conds.push_back(asl_it->data.if_node.cond);
922 last_iterator_offset = std::distance(asl.begin(), asl_it);
923
924 asl_it = asl.erase(asl_it);
925 asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
926 return asn.type == Type::Block && asn.data.block == demote_block;
927 });
928
929 asl_it = asl.erase(asl_it);
930 asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
931 return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
932 });
933 asl_it = asl.erase(asl_it);
934 }
935 const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
936 if (asn.type != Type::Block) {
937 return false;
938 }
939 for (const auto& inst : asn.data.block->Instructions()) {
940 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
941 return true;
942 }
943 }
944 return false;
945 }};
946 const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
947 const auto return_block_it{(reverse_it + 1).base()};
948
949 IR::IREmitter ir{*(return_block_it - 1)->data.block};
950 IR::U1 cond(IR::Value(false));
951 for (const auto& demote_cond : demote_conds) {
952 cond = ir.LogicalOr(cond, demote_cond);
953 }
954 cond.Inst()->DestructiveAddUsage(1);
955
956 IR::AbstractSyntaxNode demote_if_node{};
957 demote_if_node.type = Type::If;
958 demote_if_node.data.if_node.cond = cond;
959 demote_if_node.data.if_node.body = demote_blocks[0];
960 demote_if_node.data.if_node.merge = return_block_it->data.block;
961
962 IR::AbstractSyntaxNode demote_node{};
963 demote_node.type = Type::Block;
964 demote_node.data.block = demote_blocks[0];
965
966 IR::AbstractSyntaxNode demote_endif_node{};
967 demote_endif_node.type = Type::EndIf;
968 demote_endif_node.data.end_if.merge = return_block_it->data.block;
969
970 asl.insert(return_block_it, demote_endif_node);
971 asl.insert(return_block_it, demote_node);
972 asl.insert(return_block_it, demote_if_node);
973 }
974
858 ObjectPool<Statement>& stmt_pool; 975 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool; 976 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool; 977 ObjectPool<IR::Block>& block_pool;
861 Environment& env; 978 Environment& env;
862 IR::AbstractSyntaxList& syntax_list; 979 IR::AbstractSyntaxList& syntax_list;
980 bool uses_demote_to_helper{};
863 981
864// TODO: C++20 Remove this when all compilers support constexpr std::vector 982// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907 983#if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
871} // Anonymous namespace 989} // Anonymous namespace
872 990
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 991IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) { 992 Environment& env, Flow::CFG& cfg,
993 const HostTranslateInfo& host_info) {
875 ObjectPool<Statement> stmt_pool{64}; 994 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool}; 995 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()}; 996 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list; 997 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; 998 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
880 return syntax_list; 999 return syntax_list;
881} 1000}
882 1001
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index 88b083649..e38158da3 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
11#include "shader_recompiler/frontend/maxwell/control_flow.h" 11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h" 12#include "shader_recompiler/object_pool.h"
13 13
14namespace Shader::Maxwell { 14namespace Shader {
15struct HostTranslateInfo;
16namespace Maxwell {
15 17
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, 18[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env, 19 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg); 20 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19 21
20} // namespace Shader::Maxwell 22} // namespace Maxwell
23} // namespace Shader
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index c067d459c..012d55357 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { 131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program; 132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); 133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
134 program.blocks = GenerateBlocks(program.syntax_list); 134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front()); 135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage(); 136 program.stage = env.ShaderStage();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 94a584219..96468b2e7 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {
11 11
12/// Misc information about the host 12/// Misc information about the host
13struct HostTranslateInfo { 13struct HostTranslateInfo {
14 bool support_float16{}; ///< True when the device supports 16-bit floats 14 bool support_float16{}; ///< True when the device supports 16-bit floats
15 bool support_int64{}; ///< True when the device supports 64-bit integers 15 bool support_int64{}; ///< True when the device supports 64-bit integers
16 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16}; 17};
17 18
18} // namespace Shader 19} // namespace Shader
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee992aed4..de9e41659 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
156 return shader_backend; 156 return shader_backend;
157 } 157 }
158 158
159 bool IsAmd() const {
160 return vendor_name == "ATI Technologies Inc.";
161 }
162
159private: 163private:
160 static bool TestVariableAoffi(); 164 static bool TestVariableAoffi();
161 static bool TestPreciseBug(); 165 static bool TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1f4dda17e..b0e14182e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
219 host_info{ 219 host_info{
220 .support_float16 = false, 220 .support_float16 = false,
221 .support_int64 = device.HasShaderInt64(), 221 .support_int64 = device.HasShaderInt64(),
222 .needs_demote_reorder = device.IsAmd(),
222 } { 223 } {
223 if (use_asynchronous_shaders) { 224 if (use_asynchronous_shaders) {
224 workers = CreateWorkers(); 225 workers = CreateWorkers();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f316c4f92..31bfbcb06 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
325 host_info = Shader::HostTranslateInfo{ 325 host_info = Shader::HostTranslateInfo{
326 .support_float16 = device.IsFloat16Supported(), 326 .support_float16 = device.IsFloat16Supported(),
327 .support_int64 = device.IsShaderInt64Supported(), 327 .support_int64 = device.IsShaderInt64Supported(),
328 .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
329 driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
328 }; 330 };
329} 331}
330 332