diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 476 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 63 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 177 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 58 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 11 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 27 |
15 files changed, 774 insertions, 122 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2554add28..2b4266f29 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 57 | "${VIDEO_CORE}/shader/decode/video.cpp" | 57 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 59 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 60 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 59 | "${VIDEO_CORE}/shader/decode.cpp" | 61 | "${VIDEO_CORE}/shader/decode.cpp" |
| 60 | "${VIDEO_CORE}/shader/node.h" | 62 | "${VIDEO_CORE}/shader/node.h" |
| 61 | "${VIDEO_CORE}/shader/node_helper.cpp" | 63 | "${VIDEO_CORE}/shader/node_helper.cpp" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe71..cd32c65d3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -103,6 +103,8 @@ add_library(video_core STATIC | |||
| 103 | shader/decode/video.cpp | 103 | shader/decode/video.cpp |
| 104 | shader/decode/xmad.cpp | 104 | shader/decode/xmad.cpp |
| 105 | shader/decode/other.cpp | 105 | shader/decode/other.cpp |
| 106 | shader/control_flow.cpp | ||
| 107 | shader/control_flow.h | ||
| 106 | shader/decode.cpp | 108 | shader/decode.cpp |
| 107 | shader/node_helper.cpp | 109 | shader/node_helper.cpp |
| 108 | shader/node_helper.h | 110 | shader/node_helper.h |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 404d4f5aa..c3055602b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -1368,6 +1368,20 @@ union Instruction { | |||
| 1368 | } bra; | 1368 | } bra; |
| 1369 | 1369 | ||
| 1370 | union { | 1370 | union { |
| 1371 | BitField<20, 24, u64> target; | ||
| 1372 | BitField<5, 1, u64> constant_buffer; | ||
| 1373 | |||
| 1374 | s32 GetBranchExtend() const { | ||
| 1375 | // Sign extend the branch target offset | ||
| 1376 | u32 mask = 1U << (24 - 1); | ||
| 1377 | u32 value = static_cast<u32>(target); | ||
| 1378 | // The branch offset is relative to the next instruction and is stored in bytes, so | ||
| 1379 | // divide it by the size of an instruction and add 1 to it. | ||
| 1380 | return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | ||
| 1381 | } | ||
| 1382 | } brx; | ||
| 1383 | |||
| 1384 | union { | ||
| 1371 | BitField<39, 1, u64> emit; // EmitVertex | 1385 | BitField<39, 1, u64> emit; // EmitVertex |
| 1372 | BitField<40, 1, u64> cut; // EndPrimitive | 1386 | BitField<40, 1, u64> cut; // EndPrimitive |
| 1373 | } out; | 1387 | } out; |
| @@ -1464,6 +1478,7 @@ public: | |||
| 1464 | BFE_IMM, | 1478 | BFE_IMM, |
| 1465 | BFI_IMM_R, | 1479 | BFI_IMM_R, |
| 1466 | BRA, | 1480 | BRA, |
| 1481 | BRX, | ||
| 1467 | PBK, | 1482 | PBK, |
| 1468 | LD_A, | 1483 | LD_A, |
| 1469 | LD_L, | 1484 | LD_L, |
| @@ -1738,6 +1753,7 @@ private: | |||
| 1738 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), | 1753 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), |
| 1739 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), | 1754 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), |
| 1740 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | 1755 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), |
| 1756 | INST("111000100101----", Id::BRX, Type::Flow, "BRX"), | ||
| 1741 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), | 1757 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), |
| 1742 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), | 1758 | INST("111000110100---", Id::BRK, Type::Flow, "BRK"), |
| 1743 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 1759 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f9b2b03a0..5d76ee12d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -129,9 +129,11 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 129 | 129 | ||
| 130 | /// Hashes one (or two) program streams | 130 | /// Hashes one (or two) program streams |
| 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, |
| 132 | const ProgramCode& code_b) { | 132 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 133 | u64 unique_identifier = | 133 | if (size_a == 0) { |
| 134 | Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); | 134 | size_a = CalculateProgramSize(code); |
| 135 | } | ||
| 136 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | ||
| 135 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 137 | if (program_type != Maxwell::ShaderProgram::VertexA) { |
| 136 | return unique_identifier; | 138 | return unique_identifier; |
| 137 | } | 139 | } |
| @@ -140,8 +142,11 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 140 | std::size_t seed = 0; | 142 | std::size_t seed = 0; |
| 141 | boost::hash_combine(seed, unique_identifier); | 143 | boost::hash_combine(seed, unique_identifier); |
| 142 | 144 | ||
| 143 | const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), | 145 | if (size_b == 0) { |
| 144 | CalculateProgramSize(code_b)); | 146 | size_b = CalculateProgramSize(code_b); |
| 147 | } | ||
| 148 | const u64 identifier_b = | ||
| 149 | Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||
| 145 | boost::hash_combine(seed, identifier_b); | 150 | boost::hash_combine(seed, identifier_b); |
| 146 | return static_cast<u64>(seed); | 151 | return static_cast<u64>(seed); |
| 147 | } | 152 | } |
| @@ -150,14 +155,17 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 150 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 155 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, |
| 151 | ProgramCode program_code, ProgramCode program_code_b) { | 156 | ProgramCode program_code, ProgramCode program_code_b) { |
| 152 | GLShader::ShaderSetup setup(program_code); | 157 | GLShader::ShaderSetup setup(program_code); |
| 158 | setup.program.size_a = CalculateProgramSize(program_code); | ||
| 159 | setup.program.size_b = 0; | ||
| 153 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 160 | if (program_type == Maxwell::ShaderProgram::VertexA) { |
| 154 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 161 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 155 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 162 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 156 | // stage here. | 163 | // stage here. |
| 157 | setup.SetProgramB(program_code_b); | 164 | setup.SetProgramB(program_code_b); |
| 165 | setup.program.size_b = CalculateProgramSize(program_code_b); | ||
| 158 | } | 166 | } |
| 159 | setup.program.unique_identifier = | 167 | setup.program.unique_identifier = GetUniqueIdentifier( |
| 160 | GetUniqueIdentifier(program_type, program_code, program_code_b); | 168 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 161 | 169 | ||
| 162 | switch (program_type) { | 170 | switch (program_type) { |
| 163 | case Maxwell::ShaderProgram::VertexA: | 171 | case Maxwell::ShaderProgram::VertexA: |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5f2f1510c..bfc975a04 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -191,10 +191,12 @@ public: | |||
| 191 | 191 | ||
| 192 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 192 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 193 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 193 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 194 | constexpr u32 FLOW_STACK_SIZE = 20; | 194 | if (!ir.IsFlowStackDisabled()) { |
| 195 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | 195 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 196 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | 196 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 197 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | 197 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 198 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||
| 199 | } | ||
| 198 | } | 200 | } |
| 199 | 201 | ||
| 200 | code.AddLine("while (true) {{"); | 202 | code.AddLine("while (true) {{"); |
| @@ -1555,6 +1557,14 @@ private: | |||
| 1555 | return {}; | 1557 | return {}; |
| 1556 | } | 1558 | } |
| 1557 | 1559 | ||
| 1560 | std::string BranchIndirect(Operation operation) { | ||
| 1561 | const std::string op_a = VisitOperand(operation, 0, Type::Uint); | ||
| 1562 | |||
| 1563 | code.AddLine("jmp_to = {};", op_a); | ||
| 1564 | code.AddLine("break;"); | ||
| 1565 | return {}; | ||
| 1566 | } | ||
| 1567 | |||
| 1558 | std::string PushFlowStack(Operation operation) { | 1568 | std::string PushFlowStack(Operation operation) { |
| 1559 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | 1569 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); |
| 1560 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1570 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| @@ -1789,6 +1799,7 @@ private: | |||
| 1789 | &GLSLDecompiler::ImageStore, | 1799 | &GLSLDecompiler::ImageStore, |
| 1790 | 1800 | ||
| 1791 | &GLSLDecompiler::Branch, | 1801 | &GLSLDecompiler::Branch, |
| 1802 | &GLSLDecompiler::BranchIndirect, | ||
| 1792 | &GLSLDecompiler::PushFlowStack, | 1803 | &GLSLDecompiler::PushFlowStack, |
| 1793 | &GLSLDecompiler::PopFlowStack, | 1804 | &GLSLDecompiler::PopFlowStack, |
| 1794 | &GLSLDecompiler::Exit, | 1805 | &GLSLDecompiler::Exit, |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9148629ec..f9ee8429e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -29,14 +29,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | )"; | 31 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 33 | ProgramResult program = | 33 | ProgramResult program = |
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); |
| 35 | 35 | ||
| 36 | out += program.first; | 36 | out += program.first; |
| 37 | 37 | ||
| 38 | if (setup.IsDualProgram()) { | 38 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 40 | ProgramResult program_b = |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); |
| 42 | 42 | ||
| @@ -80,7 +80,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 80 | }; |
| 81 | 81 | ||
| 82 | )"; | 82 | )"; |
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 84 | ProgramResult program = | 84 | ProgramResult program = |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); |
| 86 | out += program.first; | 86 | out += program.first; |
| @@ -115,7 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 115 | }; | 115 | }; |
| 116 | 116 | ||
| 117 | )"; | 117 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 119 | ProgramResult program = |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); |
| 121 | 121 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0536c8a03..7cbc590f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -27,6 +27,8 @@ struct ShaderSetup { | |||
| 27 | ProgramCode code; | 27 | ProgramCode code; |
| 28 | ProgramCode code_b; // Used for dual vertex shaders | 28 | ProgramCode code_b; // Used for dual vertex shaders |
| 29 | u64 unique_identifier; | 29 | u64 unique_identifier; |
| 30 | std::size_t size_a; | ||
| 31 | std::size_t size_b; | ||
| 30 | } program; | 32 | } program; |
| 31 | 33 | ||
| 32 | /// Used in scenarios where we have a dual vertex shaders | 34 | /// Used in scenarios where we have a dual vertex shaders |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97ce214b1..1bb04607b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -949,6 +949,14 @@ private: | |||
| 949 | return {}; | 949 | return {}; |
| 950 | } | 950 | } |
| 951 | 951 | ||
| 952 | Id BranchIndirect(Operation operation) { | ||
| 953 | const Id op_a = VisitOperand<Type::Uint>(operation, 0); | ||
| 954 | |||
| 955 | Emit(OpStore(jmp_to, op_a)); | ||
| 956 | BranchingOp([&]() { Emit(OpBranch(continue_label)); }); | ||
| 957 | return {}; | ||
| 958 | } | ||
| 959 | |||
| 952 | Id PushFlowStack(Operation operation) { | 960 | Id PushFlowStack(Operation operation) { |
| 953 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 961 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 954 | ASSERT(target); | 962 | ASSERT(target); |
| @@ -1334,6 +1342,7 @@ private: | |||
| 1334 | &SPIRVDecompiler::ImageStore, | 1342 | &SPIRVDecompiler::ImageStore, |
| 1335 | 1343 | ||
| 1336 | &SPIRVDecompiler::Branch, | 1344 | &SPIRVDecompiler::Branch, |
| 1345 | &SPIRVDecompiler::BranchIndirect, | ||
| 1337 | &SPIRVDecompiler::PushFlowStack, | 1346 | &SPIRVDecompiler::PushFlowStack, |
| 1338 | &SPIRVDecompiler::PopFlowStack, | 1347 | &SPIRVDecompiler::PopFlowStack, |
| 1339 | &SPIRVDecompiler::Exit, | 1348 | &SPIRVDecompiler::Exit, |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 000000000..fdcc970ff --- /dev/null +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -0,0 +1,476 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <stack> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <unordered_set> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/control_flow.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | using Tegra::Shader::Instruction; | ||
| 20 | using Tegra::Shader::OpCode; | ||
| 21 | |||
| 22 | constexpr s32 unassigned_branch = -2; | ||
| 23 | |||
| 24 | struct Query { | ||
| 25 | u32 address{}; | ||
| 26 | std::stack<u32> ssy_stack{}; | ||
| 27 | std::stack<u32> pbk_stack{}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct BlockStack { | ||
| 31 | BlockStack() = default; | ||
| 32 | BlockStack(const BlockStack& b) = default; | ||
| 33 | BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 34 | std::stack<u32> ssy_stack{}; | ||
| 35 | std::stack<u32> pbk_stack{}; | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct BlockBranchInfo { | ||
| 39 | Condition condition{}; | ||
| 40 | s32 address{exit_branch}; | ||
| 41 | bool kill{}; | ||
| 42 | bool is_sync{}; | ||
| 43 | bool is_brk{}; | ||
| 44 | bool ignore{}; | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct BlockInfo { | ||
| 48 | u32 start{}; | ||
| 49 | u32 end{}; | ||
| 50 | bool visited{}; | ||
| 51 | BlockBranchInfo branch{}; | ||
| 52 | |||
| 53 | bool IsInside(const u32 address) const { | ||
| 54 | return start <= address && address <= end; | ||
| 55 | } | ||
| 56 | }; | ||
| 57 | |||
| 58 | struct CFGRebuildState { | ||
| 59 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | ||
| 60 | const u32 start) | ||
| 61 | : program_code{program_code}, program_size{program_size}, start{start} {} | ||
| 62 | |||
| 63 | u32 start{}; | ||
| 64 | std::vector<BlockInfo> block_info{}; | ||
| 65 | std::list<u32> inspect_queries{}; | ||
| 66 | std::list<Query> queries{}; | ||
| 67 | std::unordered_map<u32, u32> registered{}; | ||
| 68 | std::unordered_set<u32> labels{}; | ||
| 69 | std::map<u32, u32> ssy_labels{}; | ||
| 70 | std::map<u32, u32> pbk_labels{}; | ||
| 71 | std::unordered_map<u32, BlockStack> stacks{}; | ||
| 72 | const ProgramCode& program_code; | ||
| 73 | const std::size_t program_size; | ||
| 74 | }; | ||
| 75 | |||
| 76 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 77 | |||
| 78 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 79 | const auto& blocks = state.block_info; | ||
| 80 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 81 | if (blocks[index].start == address) { | ||
| 82 | return {BlockCollision::Found, index}; | ||
| 83 | } | ||
| 84 | if (blocks[index].IsInside(address)) { | ||
| 85 | return {BlockCollision::Inside, index}; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | return {BlockCollision::None, -1}; | ||
| 89 | } | ||
| 90 | |||
| 91 | struct ParseInfo { | ||
| 92 | BlockBranchInfo branch_info{}; | ||
| 93 | u32 end_address{}; | ||
| 94 | }; | ||
| 95 | |||
| 96 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 97 | auto& it = state.block_info.emplace_back(); | ||
| 98 | it.start = start; | ||
| 99 | it.end = end; | ||
| 100 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 101 | state.registered.insert({start, index}); | ||
| 102 | return it; | ||
| 103 | } | ||
| 104 | |||
| 105 | Pred GetPredicate(u32 index, bool negated) { | ||
| 106 | return static_cast<Pred>(index + (negated ? 8 : 0)); | ||
| 107 | } | ||
| 108 | |||
| 109 | /** | ||
| 110 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 111 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 112 | */ | ||
| 113 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 114 | constexpr u32 SchedPeriod = 4; | ||
| 115 | u32 absolute_offset = offset - main_offset; | ||
| 116 | |||
| 117 | return (absolute_offset % SchedPeriod) == 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | enum class ParseResult : u32 { | ||
| 121 | ControlCaught, | ||
| 122 | BlockEnd, | ||
| 123 | AbnormalFlow, | ||
| 124 | }; | ||
| 125 | |||
| 126 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 127 | u32 offset = static_cast<u32>(address); | ||
| 128 | const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | ||
| 129 | ParseInfo parse_info{}; | ||
| 130 | |||
| 131 | const auto insert_label = [](CFGRebuildState& state, u32 address) { | ||
| 132 | const auto pair = state.labels.emplace(address); | ||
| 133 | if (pair.second) { | ||
| 134 | state.inspect_queries.push_back(address); | ||
| 135 | } | ||
| 136 | }; | ||
| 137 | |||
| 138 | while (true) { | ||
| 139 | if (offset >= end_address) { | ||
| 140 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 141 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 142 | parse_info.branch_info.address = exit_branch; | ||
| 143 | parse_info.branch_info.ignore = false; | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | if (state.registered.count(offset) != 0) { | ||
| 147 | parse_info.branch_info.address = offset; | ||
| 148 | parse_info.branch_info.ignore = true; | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | if (IsSchedInstruction(offset, state.start)) { | ||
| 152 | offset++; | ||
| 153 | continue; | ||
| 154 | } | ||
| 155 | const Instruction instr = {state.program_code[offset]}; | ||
| 156 | const auto opcode = OpCode::Decode(instr); | ||
| 157 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 158 | offset++; | ||
| 159 | continue; | ||
| 160 | } | ||
| 161 | |||
| 162 | switch (opcode->get().GetId()) { | ||
| 163 | case OpCode::Id::EXIT: { | ||
| 164 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 165 | parse_info.branch_info.condition.predicate = | ||
| 166 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 167 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 168 | offset++; | ||
| 169 | continue; | ||
| 170 | } | ||
| 171 | const ConditionCode cc = instr.flow_condition_code; | ||
| 172 | parse_info.branch_info.condition.cc = cc; | ||
| 173 | if (cc == ConditionCode::F) { | ||
| 174 | offset++; | ||
| 175 | continue; | ||
| 176 | } | ||
| 177 | parse_info.branch_info.address = exit_branch; | ||
| 178 | parse_info.branch_info.kill = false; | ||
| 179 | parse_info.branch_info.is_sync = false; | ||
| 180 | parse_info.branch_info.is_brk = false; | ||
| 181 | parse_info.branch_info.ignore = false; | ||
| 182 | parse_info.end_address = offset; | ||
| 183 | |||
| 184 | return {ParseResult::ControlCaught, parse_info}; | ||
| 185 | } | ||
| 186 | case OpCode::Id::BRA: { | ||
| 187 | if (instr.bra.constant_buffer != 0) { | ||
| 188 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 189 | } | ||
| 190 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 191 | parse_info.branch_info.condition.predicate = | ||
| 192 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 193 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 194 | offset++; | ||
| 195 | continue; | ||
| 196 | } | ||
| 197 | const ConditionCode cc = instr.flow_condition_code; | ||
| 198 | parse_info.branch_info.condition.cc = cc; | ||
| 199 | if (cc == ConditionCode::F) { | ||
| 200 | offset++; | ||
| 201 | continue; | ||
| 202 | } | ||
| 203 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 204 | if (branch_offset == 0) { | ||
| 205 | parse_info.branch_info.address = exit_branch; | ||
| 206 | } else { | ||
| 207 | parse_info.branch_info.address = branch_offset; | ||
| 208 | } | ||
| 209 | insert_label(state, branch_offset); | ||
| 210 | parse_info.branch_info.kill = false; | ||
| 211 | parse_info.branch_info.is_sync = false; | ||
| 212 | parse_info.branch_info.is_brk = false; | ||
| 213 | parse_info.branch_info.ignore = false; | ||
| 214 | parse_info.end_address = offset; | ||
| 215 | |||
| 216 | return {ParseResult::ControlCaught, parse_info}; | ||
| 217 | } | ||
| 218 | case OpCode::Id::SYNC: { | ||
| 219 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 220 | parse_info.branch_info.condition.predicate = | ||
| 221 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 222 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 223 | offset++; | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | const ConditionCode cc = instr.flow_condition_code; | ||
| 227 | parse_info.branch_info.condition.cc = cc; | ||
| 228 | if (cc == ConditionCode::F) { | ||
| 229 | offset++; | ||
| 230 | continue; | ||
| 231 | } | ||
| 232 | parse_info.branch_info.address = unassigned_branch; | ||
| 233 | parse_info.branch_info.kill = false; | ||
| 234 | parse_info.branch_info.is_sync = true; | ||
| 235 | parse_info.branch_info.is_brk = false; | ||
| 236 | parse_info.branch_info.ignore = false; | ||
| 237 | parse_info.end_address = offset; | ||
| 238 | |||
| 239 | return {ParseResult::ControlCaught, parse_info}; | ||
| 240 | } | ||
| 241 | case OpCode::Id::BRK: { | ||
| 242 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 243 | parse_info.branch_info.condition.predicate = | ||
| 244 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 245 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 246 | offset++; | ||
| 247 | continue; | ||
| 248 | } | ||
| 249 | const ConditionCode cc = instr.flow_condition_code; | ||
| 250 | parse_info.branch_info.condition.cc = cc; | ||
| 251 | if (cc == ConditionCode::F) { | ||
| 252 | offset++; | ||
| 253 | continue; | ||
| 254 | } | ||
| 255 | parse_info.branch_info.address = unassigned_branch; | ||
| 256 | parse_info.branch_info.kill = false; | ||
| 257 | parse_info.branch_info.is_sync = false; | ||
| 258 | parse_info.branch_info.is_brk = true; | ||
| 259 | parse_info.branch_info.ignore = false; | ||
| 260 | parse_info.end_address = offset; | ||
| 261 | |||
| 262 | return {ParseResult::ControlCaught, parse_info}; | ||
| 263 | } | ||
| 264 | case OpCode::Id::KIL: { | ||
| 265 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 266 | parse_info.branch_info.condition.predicate = | ||
| 267 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 268 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 269 | offset++; | ||
| 270 | continue; | ||
| 271 | } | ||
| 272 | const ConditionCode cc = instr.flow_condition_code; | ||
| 273 | parse_info.branch_info.condition.cc = cc; | ||
| 274 | if (cc == ConditionCode::F) { | ||
| 275 | offset++; | ||
| 276 | continue; | ||
| 277 | } | ||
| 278 | parse_info.branch_info.address = exit_branch; | ||
| 279 | parse_info.branch_info.kill = true; | ||
| 280 | parse_info.branch_info.is_sync = false; | ||
| 281 | parse_info.branch_info.is_brk = false; | ||
| 282 | parse_info.branch_info.ignore = false; | ||
| 283 | parse_info.end_address = offset; | ||
| 284 | |||
| 285 | return {ParseResult::ControlCaught, parse_info}; | ||
| 286 | } | ||
| 287 | case OpCode::Id::SSY: { | ||
| 288 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 289 | insert_label(state, target); | ||
| 290 | state.ssy_labels.emplace(offset, target); | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | case OpCode::Id::PBK: { | ||
| 294 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 295 | insert_label(state, target); | ||
| 296 | state.pbk_labels.emplace(offset, target); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | case OpCode::Id::BRX: { | ||
| 300 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 301 | } | ||
| 302 | default: | ||
| 303 | break; | ||
| 304 | } | ||
| 305 | |||
| 306 | offset++; | ||
| 307 | } | ||
| 308 | parse_info.branch_info.kill = false; | ||
| 309 | parse_info.branch_info.is_sync = false; | ||
| 310 | parse_info.branch_info.is_brk = false; | ||
| 311 | parse_info.end_address = offset - 1; | ||
| 312 | return {ParseResult::BlockEnd, parse_info}; | ||
| 313 | } | ||
| 314 | |||
| 315 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 316 | if (state.inspect_queries.empty()) { | ||
| 317 | return false; | ||
| 318 | } | ||
| 319 | |||
| 320 | const u32 address = state.inspect_queries.front(); | ||
| 321 | state.inspect_queries.pop_front(); | ||
| 322 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 323 | switch (result) { | ||
| 324 | case BlockCollision::Found: { | ||
| 325 | return true; | ||
| 326 | } | ||
| 327 | case BlockCollision::Inside: { | ||
| 328 | // This case is the tricky one: | ||
| 329 | // We need to Split the block in 2 sepparate blocks | ||
| 330 | const u32 end = state.block_info[block_index].end; | ||
| 331 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 332 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 333 | current_block.end = address - 1; | ||
| 334 | new_block.branch = current_block.branch; | ||
| 335 | BlockBranchInfo forward_branch{}; | ||
| 336 | forward_branch.address = address; | ||
| 337 | forward_branch.ignore = true; | ||
| 338 | current_block.branch = forward_branch; | ||
| 339 | return true; | ||
| 340 | } | ||
| 341 | default: | ||
| 342 | break; | ||
| 343 | } | ||
| 344 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 345 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 346 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 347 | return false; | ||
| 348 | } | ||
| 349 | |||
| 350 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 351 | block_info.branch = parse_info.branch_info; | ||
| 352 | if (parse_info.branch_info.condition.IsUnconditional()) { | ||
| 353 | return true; | ||
| 354 | } | ||
| 355 | |||
| 356 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 357 | state.inspect_queries.push_front(fallthrough_address); | ||
| 358 | return true; | ||
| 359 | } | ||
| 360 | |||
| 361 | bool TryQuery(CFGRebuildState& state) { | ||
| 362 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 363 | BlockInfo& block) { | ||
| 364 | auto gather_start = labels.lower_bound(block.start); | ||
| 365 | const auto gather_end = labels.upper_bound(block.end); | ||
| 366 | while (gather_start != gather_end) { | ||
| 367 | cc.push(gather_start->second); | ||
| 368 | gather_start++; | ||
| 369 | } | ||
| 370 | }; | ||
| 371 | if (state.queries.empty()) { | ||
| 372 | return false; | ||
| 373 | } | ||
| 374 | Query& q = state.queries.front(); | ||
| 375 | const u32 block_index = state.registered[q.address]; | ||
| 376 | BlockInfo& block = state.block_info[block_index]; | ||
| 377 | // If the block is visted, check if the stacks match, else gather the ssy/pbk | ||
| 378 | // labels into the current stack and look if the branch at the end of the block | ||
| 379 | // consumes a label. Schedule new queries accordingly | ||
| 380 | if (block.visited) { | ||
| 381 | BlockStack& stack = state.stacks[q.address]; | ||
| 382 | const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && | ||
| 383 | (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); | ||
| 384 | state.queries.pop_front(); | ||
| 385 | return all_okay; | ||
| 386 | } | ||
| 387 | block.visited = true; | ||
| 388 | state.stacks[q.address] = BlockStack{q}; | ||
| 389 | Query q2(q); | ||
| 390 | state.queries.pop_front(); | ||
| 391 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 392 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 393 | if (!block.branch.condition.IsUnconditional()) { | ||
| 394 | q2.address = block.end + 1; | ||
| 395 | state.queries.push_back(q2); | ||
| 396 | } | ||
| 397 | Query conditional_query{q2}; | ||
| 398 | if (block.branch.is_sync) { | ||
| 399 | if (block.branch.address == unassigned_branch) { | ||
| 400 | block.branch.address = conditional_query.ssy_stack.top(); | ||
| 401 | } | ||
| 402 | conditional_query.ssy_stack.pop(); | ||
| 403 | } | ||
| 404 | if (block.branch.is_brk) { | ||
| 405 | if (block.branch.address == unassigned_branch) { | ||
| 406 | block.branch.address = conditional_query.pbk_stack.top(); | ||
| 407 | } | ||
| 408 | conditional_query.pbk_stack.pop(); | ||
| 409 | } | ||
| 410 | conditional_query.address = block.branch.address; | ||
| 411 | state.queries.push_back(conditional_query); | ||
| 412 | return true; | ||
| 413 | } | ||
| 414 | |||
| 415 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | ||
| 416 | u32 start_address) { | ||
| 417 | CFGRebuildState state{program_code, program_size, start_address}; | ||
| 418 | // Inspect Code and generate blocks | ||
| 419 | state.labels.clear(); | ||
| 420 | state.labels.emplace(start_address); | ||
| 421 | state.inspect_queries.push_back(state.start); | ||
| 422 | while (!state.inspect_queries.empty()) { | ||
| 423 | if (!TryInspectAddress(state)) { | ||
| 424 | return {}; | ||
| 425 | } | ||
| 426 | } | ||
| 427 | // Decompile Stacks | ||
| 428 | Query start_query{}; | ||
| 429 | start_query.address = state.start; | ||
| 430 | state.queries.push_back(start_query); | ||
| 431 | bool decompiled = true; | ||
| 432 | while (!state.queries.empty()) { | ||
| 433 | if (!TryQuery(state)) { | ||
| 434 | decompiled = false; | ||
| 435 | break; | ||
| 436 | } | ||
| 437 | } | ||
| 438 | // Sort and organize results | ||
| 439 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 440 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | ||
| 441 | ShaderCharacteristics result_out{}; | ||
| 442 | result_out.decompilable = decompiled; | ||
| 443 | result_out.start = start_address; | ||
| 444 | result_out.end = start_address; | ||
| 445 | for (auto& block : state.block_info) { | ||
| 446 | ShaderBlock new_block{}; | ||
| 447 | new_block.start = block.start; | ||
| 448 | new_block.end = block.end; | ||
| 449 | new_block.ignore_branch = block.branch.ignore; | ||
| 450 | if (!new_block.ignore_branch) { | ||
| 451 | new_block.branch.cond = block.branch.condition; | ||
| 452 | new_block.branch.kills = block.branch.kill; | ||
| 453 | new_block.branch.address = block.branch.address; | ||
| 454 | } | ||
| 455 | result_out.end = std::max(result_out.end, block.end); | ||
| 456 | result_out.blocks.push_back(new_block); | ||
| 457 | } | ||
| 458 | if (result_out.decompilable) { | ||
| 459 | result_out.labels = std::move(state.labels); | ||
| 460 | return {result_out}; | ||
| 461 | } | ||
| 462 | // If it's not decompilable, merge the unlabelled blocks together | ||
| 463 | auto back = result_out.blocks.begin(); | ||
| 464 | auto next = std::next(back); | ||
| 465 | while (next != result_out.blocks.end()) { | ||
| 466 | if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { | ||
| 467 | back->end = next->end; | ||
| 468 | next = result_out.blocks.erase(next); | ||
| 469 | continue; | ||
| 470 | } | ||
| 471 | back = next; | ||
| 472 | next++; | ||
| 473 | } | ||
| 474 | return {result_out}; | ||
| 475 | } | ||
| 476 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 000000000..5e8ea3271 --- /dev/null +++ b/src/video_core/shader/control_flow.h | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstring> | ||
| 8 | #include <list> | ||
| 9 | #include <optional> | ||
| 10 | #include <unordered_set> | ||
| 11 | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Shader::ConditionCode; | ||
| 18 | using Tegra::Shader::Pred; | ||
| 19 | |||
| 20 | constexpr s32 exit_branch = -1; | ||
| 21 | |||
| 22 | struct Condition { | ||
| 23 | Pred predicate{Pred::UnusedIndex}; | ||
| 24 | ConditionCode cc{ConditionCode::T}; | ||
| 25 | |||
| 26 | bool IsUnconditional() const { | ||
| 27 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 28 | } | ||
| 29 | bool operator==(const Condition& other) const { | ||
| 30 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 31 | } | ||
| 32 | }; | ||
| 33 | |||
| 34 | struct ShaderBlock { | ||
| 35 | u32 start{}; | ||
| 36 | u32 end{}; | ||
| 37 | bool ignore_branch{}; | ||
| 38 | struct Branch { | ||
| 39 | Condition cond{}; | ||
| 40 | bool kills{}; | ||
| 41 | s32 address{}; | ||
| 42 | bool operator==(const Branch& b) const { | ||
| 43 | return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); | ||
| 44 | } | ||
| 45 | } branch{}; | ||
| 46 | bool operator==(const ShaderBlock& sb) const { | ||
| 47 | return std::tie(start, end, ignore_branch, branch) == | ||
| 48 | std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); | ||
| 49 | } | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct ShaderCharacteristics { | ||
| 53 | std::list<ShaderBlock> blocks{}; | ||
| 54 | bool decompilable{}; | ||
| 55 | u32 start{}; | ||
| 56 | u32 end{}; | ||
| 57 | std::unordered_set<u32> labels{}; | ||
| 58 | }; | ||
| 59 | |||
| 60 | std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | ||
| 61 | u32 start_address); | ||
| 62 | |||
| 63 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..29c8895c5 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -21,20 +22,6 @@ using Tegra::Shader::OpCode; | |||
| 21 | 22 | ||
| 22 | namespace { | 23 | namespace { |
| 23 | 24 | ||
| 24 | /// Merges exit method of two parallel branches. | ||
| 25 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 26 | if (a == ExitMethod::Undetermined) { | ||
| 27 | return b; | ||
| 28 | } | ||
| 29 | if (b == ExitMethod::Undetermined) { | ||
| 30 | return a; | ||
| 31 | } | ||
| 32 | if (a == b) { | ||
| 33 | return a; | ||
| 34 | } | ||
| 35 | return ExitMethod::Conditional; | ||
| 36 | } | ||
| 37 | |||
| 38 | /** | 25 | /** |
| 39 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | 26 | * Returns whether the instruction at the specified offset is a 'sched' instruction. |
| 40 | * Sched instructions always appear before a sequence of 3 instructions. | 27 | * Sched instructions always appear before a sequence of 3 instructions. |
| @@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 38 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 40 | ||
| 54 | std::set<u32> labels; | 41 | disable_flow_stack = false; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 42 | const auto info = ScanFlow(program_code, program_size, main_offset); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 43 | if (info) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 44 | const auto& shader_info = *info; |
| 58 | } | 45 | coverage_begin = shader_info.start; |
| 59 | 46 | coverage_end = shader_info.end; | |
| 60 | if (labels.empty()) { | 47 | if (shader_info.decompilable) { |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = ([this](NodeBlock& nodes, u32 label) { | ||
| 50 | if (label == exit_branch) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | basic_blocks.insert({label, nodes}); | ||
| 54 | }); | ||
| 55 | const auto& blocks = shader_info.blocks; | ||
| 56 | NodeBlock current_block; | ||
| 57 | u32 current_label = exit_branch; | ||
| 58 | for (auto& block : blocks) { | ||
| 59 | if (shader_info.labels.count(block.start) != 0) { | ||
| 60 | insert_block(current_block, current_label); | ||
| 61 | current_block.clear(); | ||
| 62 | current_label = block.start; | ||
| 63 | } | ||
| 64 | if (!block.ignore_branch) { | ||
| 65 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 66 | InsertControlFlow(current_block, block); | ||
| 67 | } else { | ||
| 68 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | insert_block(current_block, current_label); | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 75 | // we can't decompile it, fallback to standard method | ||
| 76 | for (const auto& block : shader_info.blocks) { | ||
| 77 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 78 | } | ||
| 62 | return; | 79 | return; |
| 63 | } | 80 | } |
| 81 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||
| 82 | |||
| 83 | // Now we need to deal with an undecompilable shader. We need to brute force | ||
| 84 | // a shader that captures every position. | ||
| 85 | coverage_begin = main_offset; | ||
| 86 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 87 | coverage_end = shader_end; | ||
| 88 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 89 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 90 | } | ||
| 91 | } | ||
| 64 | 92 | ||
| 65 | labels.insert(main_offset); | 93 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
| 66 | 94 | NodeBlock basic_block; | |
| 67 | for (const u32 label : labels) { | 95 | DecodeRangeInner(basic_block, begin, end); |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 96 | return basic_block; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 97 | } |
| 70 | 98 | ||
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 99 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { |
| 100 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 101 | pc = DecodeInstr(bb, pc); | ||
| 72 | } | 102 | } |
| 73 | } | 103 | } |
| 74 | 104 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 76 | const auto [iter, inserted] = | 106 | const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { |
| 77 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | 107 | Node result = n; |
| 78 | ExitMethod& exit_method = iter->second; | 108 | if (cond.cc != ConditionCode::T) { |
| 79 | if (!inserted) | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| 80 | return exit_method; | ||
| 81 | |||
| 82 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 83 | coverage_begin = std::min(coverage_begin, offset); | ||
| 84 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 85 | |||
| 86 | const Instruction instr = {program_code[offset]}; | ||
| 87 | const auto opcode = OpCode::Decode(instr); | ||
| 88 | if (!opcode) | ||
| 89 | continue; | ||
| 90 | switch (opcode->get().GetId()) { | ||
| 91 | case OpCode::Id::EXIT: { | ||
| 92 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 93 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 94 | // and check the exit method of that other basic block. | ||
| 95 | using Tegra::Shader::Pred; | ||
| 96 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 97 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 98 | } else { | ||
| 99 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 100 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 101 | } | ||
| 102 | } | 110 | } |
| 103 | case OpCode::Id::BRA: { | 111 | if (cond.predicate != Pred::UnusedIndex) { |
| 104 | const u32 target = offset + instr.bra.GetBranchTarget(); | 112 | u32 pred = static_cast<u32>(cond.predicate); |
| 105 | labels.insert(target); | 113 | const bool is_neg = pred > 7; |
| 106 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | 114 | if (is_neg) { |
| 107 | const ExitMethod jmp = Scan(target, end, labels); | 115 | pred -= 8; |
| 108 | return exit_method = ParallelExit(no_jmp, jmp); | 116 | } |
| 109 | } | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 110 | case OpCode::Id::SSY: | ||
| 111 | case OpCode::Id::PBK: { | ||
| 112 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 113 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 114 | "Constant buffer branching is not supported"); | ||
| 115 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 116 | labels.insert(target); | ||
| 117 | // Continue scanning for an exit method. | ||
| 118 | break; | ||
| 119 | } | 118 | } |
| 120 | default: | 119 | return result; |
| 121 | break; | 120 | }); |
| 121 | if (block.branch.address < 0) { | ||
| 122 | if (block.branch.kills) { | ||
| 123 | Node n = Operation(OperationCode::Discard); | ||
| 124 | n = apply_conditions(block.branch.cond, n); | ||
| 125 | bb.push_back(n); | ||
| 126 | global_code.push_back(n); | ||
| 127 | return; | ||
| 122 | } | 128 | } |
| 129 | Node n = Operation(OperationCode::Exit); | ||
| 130 | n = apply_conditions(block.branch.cond, n); | ||
| 131 | bb.push_back(n); | ||
| 132 | global_code.push_back(n); | ||
| 133 | return; | ||
| 123 | } | 134 | } |
| 124 | return exit_method = ExitMethod::AlwaysReturn; | 135 | Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); |
| 125 | } | 136 | n = apply_conditions(block.branch.cond, n); |
| 126 | 137 | bb.push_back(n); | |
| 127 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | 138 | global_code.push_back(n); |
| 128 | NodeBlock basic_block; | ||
| 129 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 130 | pc = DecodeInstr(basic_block, pc); | ||
| 131 | } | ||
| 132 | return basic_block; | ||
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | 141 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |
| @@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 140 | 146 | ||
| 141 | const Instruction instr = {program_code[pc]}; | 147 | const Instruction instr = {program_code[pc]}; |
| 142 | const auto opcode = OpCode::Decode(instr); | 148 | const auto opcode = OpCode::Decode(instr); |
| 149 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 143 | 150 | ||
| 144 | // Decoding failure | 151 | // Decoding failure |
| 145 | if (!opcode) { | 152 | if (!opcode) { |
| 146 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | 153 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
| 154 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 155 | nv_address, instr.value))); | ||
| 147 | return pc + 1; | 156 | return pc + 1; |
| 148 | } | 157 | } |
| 149 | 158 | ||
| 150 | bb.push_back( | 159 | bb.push_back(Comment( |
| 151 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | 160 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); |
| 152 | 161 | ||
| 153 | using Tegra::Shader::Pred; | 162 | using Tegra::Shader::Pred; |
| 154 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | 163 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46a8ab82..42e3de02f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -91,11 +91,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 91 | break; | 91 | break; |
| 92 | } | 92 | } |
| 93 | case OpCode::Id::BRA: { | 93 | case OpCode::Id::BRA: { |
| 94 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 94 | Node branch; |
| 95 | "BRA with constant buffers are not implemented"); | 95 | if (instr.bra.constant_buffer == 0) { |
| 96 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 97 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 98 | } else { | ||
| 99 | const u32 target = pc + 1; | ||
| 100 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 101 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 102 | PRECISE, op_a, Immediate(3)); | ||
| 103 | const Node operand = | ||
| 104 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 105 | branch = Operation(OperationCode::BranchIndirect, convert); | ||
| 106 | } | ||
| 96 | 107 | ||
| 97 | const u32 target = pc + instr.bra.GetBranchTarget(); | 108 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 98 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | 109 | if (cc != Tegra::Shader::ConditionCode::T) { |
| 110 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 111 | } else { | ||
| 112 | bb.push_back(branch); | ||
| 113 | } | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | case OpCode::Id::BRX: { | ||
| 117 | Node operand; | ||
| 118 | if (instr.brx.constant_buffer != 0) { | ||
| 119 | const s32 target = pc + 1; | ||
| 120 | const Node index = GetRegister(instr.gpr8); | ||
| 121 | const Node op_a = | ||
| 122 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 123 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 124 | PRECISE, op_a, Immediate(3)); | ||
| 125 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 126 | } else { | ||
| 127 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 128 | const Node op_a = GetRegister(instr.gpr8); | ||
| 129 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 130 | PRECISE, op_a, Immediate(3)); | ||
| 131 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 132 | } | ||
| 133 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 99 | 134 | ||
| 100 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 135 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 101 | if (cc != Tegra::Shader::ConditionCode::T) { | 136 | if (cc != Tegra::Shader::ConditionCode::T) { |
| @@ -109,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 144 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 110 | "Constant buffer flow is not supported"); | 145 | "Constant buffer flow is not supported"); |
| 111 | 146 | ||
| 147 | if (disable_flow_stack) { | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | |||
| 112 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | 151 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. |
| 113 | const u32 target = pc + instr.bra.GetBranchTarget(); | 152 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 114 | bb.push_back( | 153 | bb.push_back( |
| @@ -119,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 119 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 158 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 120 | "Constant buffer PBK is not supported"); | 159 | "Constant buffer PBK is not supported"); |
| 121 | 160 | ||
| 161 | if (disable_flow_stack) { | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | |||
| 122 | // PBK pushes to a stack the address where BRK will jump to. | 165 | // PBK pushes to a stack the address where BRK will jump to. |
| 123 | const u32 target = pc + instr.bra.GetBranchTarget(); | 166 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 124 | bb.push_back( | 167 | bb.push_back( |
| @@ -130,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 130 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | 173 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
| 131 | static_cast<u32>(cc)); | 174 | static_cast<u32>(cc)); |
| 132 | 175 | ||
| 176 | if (disable_flow_stack) { | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | |||
| 133 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 180 | // The SYNC opcode jumps to the address previously set by the SSY opcode |
| 134 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | 181 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |
| 135 | break; | 182 | break; |
| @@ -138,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 138 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 185 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 139 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
| 140 | static_cast<u32>(cc)); | 187 | static_cast<u32>(cc)); |
| 188 | if (disable_flow_stack) { | ||
| 189 | break; | ||
| 190 | } | ||
| 141 | 191 | ||
| 142 | // The BRK opcode jumps to the address previously set by the PBK opcode | 192 | // The BRK opcode jumps to the address previously set by the PBK opcode |
| 143 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | 193 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 0ac83fcf0..7427ed896 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -148,11 +148,12 @@ enum class OperationCode { | |||
| 148 | 148 | ||
| 149 | ImageStore, /// (MetaImage, float[N] coords) -> void | 149 | ImageStore, /// (MetaImage, float[N] coords) -> void |
| 150 | 150 | ||
| 151 | Branch, /// (uint branch_target) -> void | 151 | Branch, /// (uint branch_target) -> void |
| 152 | PushFlowStack, /// (uint branch_target) -> void | 152 | BranchIndirect, /// (uint branch_target) -> void |
| 153 | PopFlowStack, /// () -> void | 153 | PushFlowStack, /// (uint branch_target) -> void |
| 154 | Exit, /// () -> void | 154 | PopFlowStack, /// () -> void |
| 155 | Discard, /// () -> void | 155 | Exit, /// () -> void |
| 156 | Discard, /// () -> void | ||
| 156 | 157 | ||
| 157 | EmitVertex, /// () -> void | 158 | EmitVertex, /// () -> void |
| 158 | EndPrimitive, /// () -> void | 159 | EndPrimitive, /// () -> void |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 11b545cca..5994bfc4e 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 22 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 23 | using Tegra::Shader::Register; |
| 24 | 24 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) | 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) |
| 26 | : program_code{program_code}, main_offset{main_offset} { | 26 | : program_code{program_code}, main_offset{main_offset}, program_size{size} { |
| 27 | Decode(); | 27 | Decode(); |
| 28 | } | 28 | } |
| 29 | 29 | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e22548208..6145f0a70 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -22,18 +22,12 @@ | |||
| 22 | 22 | ||
| 23 | namespace VideoCommon::Shader { | 23 | namespace VideoCommon::Shader { |
| 24 | 24 | ||
| 25 | struct ShaderBlock; | ||
| 26 | |||
| 25 | using ProgramCode = std::vector<u64>; | 27 | using ProgramCode = std::vector<u64>; |
| 26 | 28 | ||
| 27 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | 29 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
| 28 | 30 | ||
| 29 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 30 | enum class ExitMethod { | ||
| 31 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 32 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 33 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 34 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 35 | }; | ||
| 36 | |||
| 37 | class ConstBuffer { | 31 | class ConstBuffer { |
| 38 | public: | 32 | public: |
| 39 | explicit ConstBuffer(u32 max_offset, bool is_indirect) | 33 | explicit ConstBuffer(u32 max_offset, bool is_indirect) |
| @@ -73,7 +67,7 @@ struct GlobalMemoryUsage { | |||
| 73 | 67 | ||
| 74 | class ShaderIR final { | 68 | class ShaderIR final { |
| 75 | public: | 69 | public: |
| 76 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); | 70 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); |
| 77 | ~ShaderIR(); | 71 | ~ShaderIR(); |
| 78 | 72 | ||
| 79 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 73 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -129,12 +123,20 @@ public: | |||
| 129 | return header; | 123 | return header; |
| 130 | } | 124 | } |
| 131 | 125 | ||
| 126 | bool IsFlowStackDisabled() const { | ||
| 127 | return disable_flow_stack; | ||
| 128 | } | ||
| 129 | |||
| 130 | u32 ConvertAddressToNvidiaSpace(const u32 address) const { | ||
| 131 | return (address - main_offset) * sizeof(Tegra::Shader::Instruction); | ||
| 132 | } | ||
| 133 | |||
| 132 | private: | 134 | private: |
| 133 | void Decode(); | 135 | void Decode(); |
| 134 | 136 | ||
| 135 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 136 | |||
| 137 | NodeBlock DecodeRange(u32 begin, u32 end); | 137 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 138 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 139 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 138 | 140 | ||
| 139 | /** | 141 | /** |
| 140 | * Decodes a single instruction from Tegra to IR. | 142 | * Decodes a single instruction from Tegra to IR. |
| @@ -326,10 +328,11 @@ private: | |||
| 326 | 328 | ||
| 327 | const ProgramCode& program_code; | 329 | const ProgramCode& program_code; |
| 328 | const u32 main_offset; | 330 | const u32 main_offset; |
| 331 | const std::size_t program_size; | ||
| 332 | bool disable_flow_stack{}; | ||
| 329 | 333 | ||
| 330 | u32 coverage_begin{}; | 334 | u32 coverage_begin{}; |
| 331 | u32 coverage_end{}; | 335 | u32 coverage_end{}; |
| 332 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 333 | 336 | ||
| 334 | std::map<u32, NodeBlock> basic_blocks; | 337 | std::map<u32, NodeBlock> basic_blocks; |
| 335 | NodeBlock global_code; | 338 | NodeBlock global_code; |