diff options
| author | 2019-01-25 23:42:14 -0500 | |
|---|---|---|
| committer | 2019-01-25 23:42:14 -0500 | |
| commit | 1f4ca1e841cd0b0427218d787efe10a3fa62df33 (patch) | |
| tree | 00cc1743c6a6ba593e3b56897b13c2272a71d779 /src | |
| parent | Merge pull request #2054 from bunnei/scope-context-refactor (diff) | |
| parent | shader_ir: Fixup clang build (diff) | |
| download | yuzu-1f4ca1e841cd0b0427218d787efe10a3fa62df33.tar.gz yuzu-1f4ca1e841cd0b0427218d787efe10a3fa62df33.tar.xz yuzu-1f4ca1e841cd0b0427218d787efe10a3fa62df33.zip | |
Merge pull request #1927 from ReinUsesLisp/shader-ir
video_core: Replace gl_shader_decompiler with an IR based decompiler
Diffstat (limited to 'src')
39 files changed, 5497 insertions, 3808 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 327db68a5..509ca117a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -59,6 +59,34 @@ add_library(video_core STATIC | |||
| 59 | renderer_opengl/renderer_opengl.h | 59 | renderer_opengl/renderer_opengl.h |
| 60 | renderer_opengl/utils.cpp | 60 | renderer_opengl/utils.cpp |
| 61 | renderer_opengl/utils.h | 61 | renderer_opengl/utils.h |
| 62 | shader/decode/arithmetic.cpp | ||
| 63 | shader/decode/arithmetic_immediate.cpp | ||
| 64 | shader/decode/bfe.cpp | ||
| 65 | shader/decode/bfi.cpp | ||
| 66 | shader/decode/shift.cpp | ||
| 67 | shader/decode/arithmetic_integer.cpp | ||
| 68 | shader/decode/arithmetic_integer_immediate.cpp | ||
| 69 | shader/decode/arithmetic_half.cpp | ||
| 70 | shader/decode/arithmetic_half_immediate.cpp | ||
| 71 | shader/decode/ffma.cpp | ||
| 72 | shader/decode/hfma2.cpp | ||
| 73 | shader/decode/conversion.cpp | ||
| 74 | shader/decode/memory.cpp | ||
| 75 | shader/decode/float_set_predicate.cpp | ||
| 76 | shader/decode/integer_set_predicate.cpp | ||
| 77 | shader/decode/half_set_predicate.cpp | ||
| 78 | shader/decode/predicate_set_register.cpp | ||
| 79 | shader/decode/predicate_set_predicate.cpp | ||
| 80 | shader/decode/register_set_predicate.cpp | ||
| 81 | shader/decode/float_set.cpp | ||
| 82 | shader/decode/integer_set.cpp | ||
| 83 | shader/decode/half_set.cpp | ||
| 84 | shader/decode/video.cpp | ||
| 85 | shader/decode/xmad.cpp | ||
| 86 | shader/decode/other.cpp | ||
| 87 | shader/decode.cpp | ||
| 88 | shader/shader_ir.cpp | ||
| 89 | shader/shader_ir.h | ||
| 62 | surface.cpp | 90 | surface.cpp |
| 63 | surface.h | 91 | surface.h |
| 64 | textures/astc.cpp | 92 | textures/astc.cpp |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e53c77f2b..cdef97bc6 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -397,6 +397,10 @@ struct IpaMode { | |||
| 397 | bool operator!=(const IpaMode& a) const { | 397 | bool operator!=(const IpaMode& a) const { |
| 398 | return !operator==(a); | 398 | return !operator==(a); |
| 399 | } | 399 | } |
| 400 | bool operator<(const IpaMode& a) const { | ||
| 401 | return std::tie(interpolation_mode, sampling_mode) < | ||
| 402 | std::tie(a.interpolation_mode, a.sampling_mode); | ||
| 403 | } | ||
| 400 | }; | 404 | }; |
| 401 | 405 | ||
| 402 | enum class SystemVariable : u64 { | 406 | enum class SystemVariable : u64 { |
| @@ -644,6 +648,7 @@ union Instruction { | |||
| 644 | BitField<37, 2, HalfPrecision> precision; | 648 | BitField<37, 2, HalfPrecision> precision; |
| 645 | BitField<32, 1, u64> saturate; | 649 | BitField<32, 1, u64> saturate; |
| 646 | 650 | ||
| 651 | BitField<31, 1, u64> negate_b; | ||
| 647 | BitField<30, 1, u64> negate_c; | 652 | BitField<30, 1, u64> negate_c; |
| 648 | BitField<35, 2, HalfType> type_c; | 653 | BitField<35, 2, HalfType> type_c; |
| 649 | } rr; | 654 | } rr; |
| @@ -1431,6 +1436,7 @@ public: | |||
| 1431 | PredicateSetRegister, | 1436 | PredicateSetRegister, |
| 1432 | RegisterSetPredicate, | 1437 | RegisterSetPredicate, |
| 1433 | Conversion, | 1438 | Conversion, |
| 1439 | Video, | ||
| 1434 | Xmad, | 1440 | Xmad, |
| 1435 | Unknown, | 1441 | Unknown, |
| 1436 | }; | 1442 | }; |
| @@ -1562,8 +1568,8 @@ private: | |||
| 1562 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1568 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1563 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1569 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1564 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1570 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| 1565 | INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), | 1571 | INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), |
| 1566 | INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"), | 1572 | INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), |
| 1567 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | 1573 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), |
| 1568 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | 1574 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), |
| 1569 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | 1575 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index 99c34649f..cf2b76ff6 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h | |||
| @@ -106,7 +106,7 @@ struct Header { | |||
| 106 | } ps; | 106 | } ps; |
| 107 | }; | 107 | }; |
| 108 | 108 | ||
| 109 | u64 GetLocalMemorySize() { | 109 | u64 GetLocalMemorySize() const { |
| 110 | return (common1.shader_local_memory_low_size | | 110 | return (common1.shader_local_memory_low_size | |
| 111 | (common2.shader_local_memory_high_size << 24)); | 111 | (common2.shader_local_memory_high_size << 24)); |
| 112 | } | 112 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6600ad528..71829fee0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -930,7 +930,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 930 | const auto& gpu = Core::System::GetInstance().GPU(); | 930 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 931 | const auto& maxwell3d = gpu.Maxwell3D(); | 931 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 932 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; | 932 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; |
| 933 | const auto& entries = shader->GetShaderEntries().const_buffer_entries; | 933 | const auto& entries = shader->GetShaderEntries().const_buffers; |
| 934 | 934 | ||
| 935 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; | 935 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; |
| 936 | std::array<GLuint, max_binds> bind_buffers; | 936 | std::array<GLuint, max_binds> bind_buffers; |
| @@ -998,7 +998,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | |||
| 998 | MICROPROFILE_SCOPE(OpenGL_Texture); | 998 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 999 | const auto& gpu = Core::System::GetInstance().GPU(); | 999 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 1000 | const auto& maxwell3d = gpu.Maxwell3D(); | 1000 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 1001 | const auto& entries = shader->GetShaderEntries().texture_samplers; | 1001 | const auto& entries = shader->GetShaderEntries().samplers; |
| 1002 | 1002 | ||
| 1003 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), | 1003 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), |
| 1004 | "Exceeded the number of active textures."); | 1004 | "Exceeded the number of active textures."); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c785fffa3..b3aca39af 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -10,11 +10,15 @@ | |||
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/utils.h" | 15 | #include "video_core/renderer_opengl/utils.h" |
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 15 | 17 | ||
| 16 | namespace OpenGL { | 18 | namespace OpenGL { |
| 17 | 19 | ||
| 20 | using VideoCommon::Shader::ProgramCode; | ||
| 21 | |||
| 18 | /// Gets the address for the specified shader stage program | 22 | /// Gets the address for the specified shader stage program |
| 19 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 23 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 20 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 24 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| @@ -24,8 +28,8 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | |||
| 24 | } | 28 | } |
| 25 | 29 | ||
| 26 | /// Gets the shader program code from memory for the specified address | 30 | /// Gets the shader program code from memory for the specified address |
| 27 | static GLShader::ProgramCode GetShaderCode(VAddr addr) { | 31 | static ProgramCode GetShaderCode(VAddr addr) { |
| 28 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | 32 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 29 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); | 33 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |
| 30 | return program_code; | 34 | return program_code; |
| 31 | } | 35 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 768747968..e0887dd7b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/rasterizer_cache.h" | 13 | #include "video_core/rasterizer_cache.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 16 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 16 | 17 | ||
| 17 | namespace OpenGL { | 18 | namespace OpenGL { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1bb09e61b..3411cf9e6 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2,247 +2,40 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <map> | 5 | #include <array> |
| 6 | #include <optional> | ||
| 7 | #include <set> | ||
| 8 | #include <string> | 6 | #include <string> |
| 9 | #include <string_view> | 7 | #include <string_view> |
| 10 | #include <unordered_set> | 8 | #include <variant> |
| 11 | 9 | ||
| 12 | #include <fmt/format.h> | 10 | #include <fmt/format.h> |
| 13 | 11 | ||
| 12 | #include "common/alignment.h" | ||
| 14 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 15 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/shader_bytecode.h" | 15 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_header.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 16 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 19 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 20 | 19 | ||
| 21 | namespace OpenGL::GLShader::Decompiler { | 20 | namespace OpenGL::GLShader { |
| 22 | 21 | ||
| 23 | using Tegra::Shader::Attribute; | 22 | using Tegra::Shader::Attribute; |
| 24 | using Tegra::Shader::Instruction; | 23 | using Tegra::Shader::Header; |
| 25 | using Tegra::Shader::LogicOperation; | 24 | using Tegra::Shader::IpaInterpMode; |
| 26 | using Tegra::Shader::OpCode; | 25 | using Tegra::Shader::IpaMode; |
| 26 | using Tegra::Shader::IpaSampleMode; | ||
| 27 | using Tegra::Shader::Register; | 27 | using Tegra::Shader::Register; |
| 28 | using Tegra::Shader::Sampler; | 28 | using namespace VideoCommon::Shader; |
| 29 | using Tegra::Shader::SubOp; | ||
| 30 | 29 | ||
| 31 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | 30 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 32 | constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); | 31 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; |
| 32 | using Operation = const OperationNode&; | ||
| 33 | 33 | ||
| 34 | constexpr u32 MAX_GEOMETRY_BUFFERS = 6; | 34 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; |
| 35 | constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested | 35 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 36 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | ||
| 36 | 37 | ||
| 37 | static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag", | 38 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 38 | "overflow_flag"}; | ||
| 39 | |||
| 40 | enum class InternalFlag : u64 { | ||
| 41 | ZeroFlag = 0, | ||
| 42 | SignFlag = 1, | ||
| 43 | CarryFlag = 2, | ||
| 44 | OverflowFlag = 3, | ||
| 45 | Amount | ||
| 46 | }; | ||
| 47 | |||
| 48 | class DecompileFail : public std::runtime_error { | ||
| 49 | public: | ||
| 50 | using std::runtime_error::runtime_error; | ||
| 51 | }; | ||
| 52 | |||
| 53 | /// Generates code to use for a swizzle operation. | ||
| 54 | static std::string GetSwizzle(u64 elem) { | ||
| 55 | ASSERT(elem <= 3); | ||
| 56 | std::string swizzle = "."; | ||
| 57 | swizzle += "xyzw"[elem]; | ||
| 58 | return swizzle; | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Translate topology | ||
| 62 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 63 | switch (topology) { | ||
| 64 | case Tegra::Shader::OutputTopology::PointList: | ||
| 65 | return "points"; | ||
| 66 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 67 | return "line_strip"; | ||
| 68 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 69 | return "triangle_strip"; | ||
| 70 | default: | ||
| 71 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 72 | return "points"; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 77 | enum class ExitMethod { | ||
| 78 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 79 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 80 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 81 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 82 | }; | ||
| 83 | |||
| 84 | /// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. | ||
| 85 | struct Subroutine { | ||
| 86 | /// Generates a name suitable for GLSL source code. | ||
| 87 | std::string GetName() const { | ||
| 88 | return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix; | ||
| 89 | } | ||
| 90 | |||
| 91 | u32 begin; ///< Entry point of the subroutine. | ||
| 92 | u32 end; ///< Return point of the subroutine. | ||
| 93 | const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name | ||
| 94 | ExitMethod exit_method; ///< Exit method of the subroutine. | ||
| 95 | std::set<u32> labels; ///< Addresses refereced by JMP instructions. | ||
| 96 | |||
| 97 | bool operator<(const Subroutine& rhs) const { | ||
| 98 | return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); | ||
| 99 | } | ||
| 100 | }; | ||
| 101 | |||
| 102 | /// Analyzes shader code and produces a set of subroutines. | ||
| 103 | class ControlFlowAnalyzer { | ||
| 104 | public: | ||
| 105 | ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) | ||
| 106 | : program_code(program_code), shader_coverage_begin(main_offset), | ||
| 107 | shader_coverage_end(main_offset + 1) { | ||
| 108 | |||
| 109 | // Recursively finds all subroutines. | ||
| 110 | const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); | ||
| 111 | if (program_main.exit_method != ExitMethod::AlwaysEnd) | ||
| 112 | throw DecompileFail("Program does not always end"); | ||
| 113 | } | ||
| 114 | |||
| 115 | std::set<Subroutine> GetSubroutines() { | ||
| 116 | return std::move(subroutines); | ||
| 117 | } | ||
| 118 | |||
| 119 | std::size_t GetShaderLength() const { | ||
| 120 | return shader_coverage_end * sizeof(u64); | ||
| 121 | } | ||
| 122 | |||
| 123 | private: | ||
| 124 | const ProgramCode& program_code; | ||
| 125 | std::set<Subroutine> subroutines; | ||
| 126 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 127 | u32 shader_coverage_begin; | ||
| 128 | u32 shader_coverage_end; | ||
| 129 | |||
| 130 | /// Adds and analyzes a new subroutine if it is not added yet. | ||
| 131 | const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { | ||
| 132 | Subroutine subroutine{begin, end, suffix, ExitMethod::Undetermined, {}}; | ||
| 133 | |||
| 134 | const auto iter = subroutines.find(subroutine); | ||
| 135 | if (iter != subroutines.end()) { | ||
| 136 | return *iter; | ||
| 137 | } | ||
| 138 | |||
| 139 | subroutine.exit_method = Scan(begin, end, subroutine.labels); | ||
| 140 | if (subroutine.exit_method == ExitMethod::Undetermined) { | ||
| 141 | throw DecompileFail("Recursive function detected"); | ||
| 142 | } | ||
| 143 | |||
| 144 | return *subroutines.insert(std::move(subroutine)).first; | ||
| 145 | } | ||
| 146 | |||
| 147 | /// Merges exit method of two parallel branches. | ||
| 148 | static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 149 | if (a == ExitMethod::Undetermined) { | ||
| 150 | return b; | ||
| 151 | } | ||
| 152 | if (b == ExitMethod::Undetermined) { | ||
| 153 | return a; | ||
| 154 | } | ||
| 155 | if (a == b) { | ||
| 156 | return a; | ||
| 157 | } | ||
| 158 | return ExitMethod::Conditional; | ||
| 159 | } | ||
| 160 | |||
| 161 | /// Scans a range of code for labels and determines the exit method. | ||
| 162 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 163 | const auto [iter, inserted] = | ||
| 164 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 165 | ExitMethod& exit_method = iter->second; | ||
| 166 | if (!inserted) | ||
| 167 | return exit_method; | ||
| 168 | |||
| 169 | for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { | ||
| 170 | shader_coverage_begin = std::min(shader_coverage_begin, offset); | ||
| 171 | shader_coverage_end = std::max(shader_coverage_end, offset + 1); | ||
| 172 | |||
| 173 | const Instruction instr = {program_code[offset]}; | ||
| 174 | if (const auto opcode = OpCode::Decode(instr)) { | ||
| 175 | switch (opcode->get().GetId()) { | ||
| 176 | case OpCode::Id::EXIT: { | ||
| 177 | // The EXIT instruction can be predicated, which means that the shader can | ||
| 178 | // conditionally end on this instruction. We have to consider the case where the | ||
| 179 | // condition is not met and check the exit method of that other basic block. | ||
| 180 | using Tegra::Shader::Pred; | ||
| 181 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 182 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 183 | } else { | ||
| 184 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 185 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | case OpCode::Id::BRA: { | ||
| 189 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 190 | labels.insert(target); | ||
| 191 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||
| 192 | const ExitMethod jmp = Scan(target, end, labels); | ||
| 193 | return exit_method = ParallelExit(no_jmp, jmp); | ||
| 194 | } | ||
| 195 | case OpCode::Id::SSY: | ||
| 196 | case OpCode::Id::PBK: { | ||
| 197 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 198 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 199 | "Constant buffer branching is not supported"); | ||
| 200 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 201 | labels.insert(target); | ||
| 202 | // Continue scanning for an exit method. | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } | ||
| 208 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 209 | } | ||
| 210 | }; | ||
| 211 | |||
| 212 | template <typename T> | ||
| 213 | class ShaderScopedScope { | ||
| 214 | public: | ||
| 215 | explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) | ||
| 216 | : writer(writer), end_expr(std::move(end_expr)) { | ||
| 217 | |||
| 218 | if (begin_expr.empty()) { | ||
| 219 | writer.AddLine('{'); | ||
| 220 | } else { | ||
| 221 | writer.AddExpression(begin_expr); | ||
| 222 | writer.AddLine(" {"); | ||
| 223 | } | ||
| 224 | ++writer.scope; | ||
| 225 | } | ||
| 226 | |||
| 227 | ShaderScopedScope(const ShaderScopedScope&) = delete; | ||
| 228 | |||
| 229 | ~ShaderScopedScope() { | ||
| 230 | --writer.scope; | ||
| 231 | if (end_expr.empty()) { | ||
| 232 | writer.AddLine('}'); | ||
| 233 | } else { | ||
| 234 | writer.AddExpression("} "); | ||
| 235 | writer.AddExpression(end_expr); | ||
| 236 | writer.AddLine(';'); | ||
| 237 | } | ||
| 238 | } | ||
| 239 | |||
| 240 | ShaderScopedScope& operator=(const ShaderScopedScope&) = delete; | ||
| 241 | |||
| 242 | private: | ||
| 243 | T& writer; | ||
| 244 | std::string end_expr; | ||
| 245 | }; | ||
| 246 | 39 | ||
| 247 | class ShaderWriter { | 40 | class ShaderWriter { |
| 248 | public: | 41 | public: |
| @@ -271,16 +64,17 @@ public: | |||
| 271 | shader_source += '\n'; | 64 | shader_source += '\n'; |
| 272 | } | 65 | } |
| 273 | 66 | ||
| 274 | std::string GetResult() { | 67 | std::string GenerateTemporal() { |
| 275 | return std::move(shader_source); | 68 | std::string temporal = "tmp"; |
| 69 | temporal += std::to_string(temporal_index++); | ||
| 70 | return temporal; | ||
| 276 | } | 71 | } |
| 277 | 72 | ||
| 278 | ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, | 73 | std::string GetResult() { |
| 279 | std::string end_expr = {}) { | 74 | return std::move(shader_source); |
| 280 | return ShaderScopedScope(*this, begin_expr, end_expr); | ||
| 281 | } | 75 | } |
| 282 | 76 | ||
| 283 | int scope = 0; | 77 | s32 scope = 0; |
| 284 | 78 | ||
| 285 | private: | 79 | private: |
| 286 | void AppendIndentation() { | 80 | void AppendIndentation() { |
| @@ -288,3663 +82,1447 @@ private: | |||
| 288 | } | 82 | } |
| 289 | 83 | ||
| 290 | std::string shader_source; | 84 | std::string shader_source; |
| 85 | u32 temporal_index = 1; | ||
| 291 | }; | 86 | }; |
| 292 | 87 | ||
| 293 | /** | 88 | /// Generates code to use for a swizzle operation. |
| 294 | * Represents an emulated shader register, used to track the state of that register for emulation | 89 | static std::string GetSwizzle(u32 elem) { |
| 295 | * with GLSL. At this time, a register can be used as a float or an integer. This class is used for | 90 | ASSERT(elem <= 3); |
| 296 | * bookkeeping within the GLSL program. | 91 | std::string swizzle = "."; |
| 297 | */ | 92 | swizzle += "xyzw"[elem]; |
| 298 | class GLSLRegister { | 93 | return swizzle; |
| 299 | public: | 94 | } |
| 300 | enum class Type { | ||
| 301 | Float, | ||
| 302 | Integer, | ||
| 303 | UnsignedInteger, | ||
| 304 | }; | ||
| 305 | |||
| 306 | GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} | ||
| 307 | 95 | ||
| 308 | /// Gets the GLSL type string for a register | 96 | /// Translate topology |
| 309 | static std::string GetTypeString() { | 97 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 310 | return "float"; | 98 | switch (topology) { |
| 99 | case Tegra::Shader::OutputTopology::PointList: | ||
| 100 | return "points"; | ||
| 101 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 102 | return "line_strip"; | ||
| 103 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 104 | return "triangle_strip"; | ||
| 105 | default: | ||
| 106 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 107 | return "points"; | ||
| 311 | } | 108 | } |
| 109 | } | ||
| 312 | 110 | ||
| 313 | /// Gets the GLSL register prefix string, used for declarations and referencing | 111 | /// Returns true if an object has to be treated as precise |
| 314 | static std::string GetPrefixString() { | 112 | static bool IsPrecise(Operation operand) { |
| 315 | return "reg_"; | 113 | const auto& meta = operand.GetMeta(); |
| 316 | } | ||
| 317 | 114 | ||
| 318 | /// Returns a GLSL string representing the current state of the register | 115 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |
| 319 | std::string GetString() const { | 116 | return arithmetic->precise; |
| 320 | return GetPrefixString() + std::to_string(index) + '_' + suffix; | ||
| 321 | } | 117 | } |
| 322 | 118 | if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { | |
| 323 | /// Returns the index of the register | 119 | return half_arithmetic->precise; |
| 324 | std::size_t GetIndex() const { | ||
| 325 | return index; | ||
| 326 | } | 120 | } |
| 121 | return false; | ||
| 122 | } | ||
| 327 | 123 | ||
| 328 | private: | 124 | static bool IsPrecise(Node node) { |
| 329 | const std::size_t index; | 125 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 330 | const std::string& suffix; | 126 | return IsPrecise(*operation); |
| 331 | }; | ||
| 332 | |||
| 333 | /** | ||
| 334 | * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state | ||
| 335 | * of all registers (e.g. whether they are currently being used as Floats or Integers), and | ||
| 336 | * generates the necessary GLSL code to perform conversions as needed. This class is used for | ||
| 337 | * bookkeeping within the GLSL program. | ||
| 338 | */ | ||
| 339 | class GLSLRegisterManager { | ||
| 340 | public: | ||
| 341 | GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, | ||
| 342 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, | ||
| 343 | const Tegra::Shader::Header& header) | ||
| 344 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, | ||
| 345 | fixed_pipeline_output_attributes_used{}, local_memory_size{0} { | ||
| 346 | BuildRegisterList(); | ||
| 347 | BuildInputList(); | ||
| 348 | } | ||
| 349 | |||
| 350 | void SetConditionalCodesFromExpression(const std::string& expresion) { | ||
| 351 | SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0"); | ||
| 352 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); | ||
| 353 | } | ||
| 354 | |||
| 355 | void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) { | ||
| 356 | SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem))); | ||
| 357 | } | ||
| 358 | |||
| 359 | /** | ||
| 360 | * Returns code that does an integer size conversion for the specified size. | ||
| 361 | * @param value Value to perform integer size conversion on. | ||
| 362 | * @param size Register size to use for conversion instructions. | ||
| 363 | * @returns GLSL string corresponding to the value converted to the specified size. | ||
| 364 | */ | ||
| 365 | static std::string ConvertIntegerSize(const std::string& value, Register::Size size) { | ||
| 366 | switch (size) { | ||
| 367 | case Register::Size::Byte: | ||
| 368 | return "((" + value + " << 24) >> 24)"; | ||
| 369 | case Register::Size::Short: | ||
| 370 | return "((" + value + " << 16) >> 16)"; | ||
| 371 | case Register::Size::Word: | ||
| 372 | // Default - do nothing | ||
| 373 | return value; | ||
| 374 | default: | ||
| 375 | UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); | ||
| 376 | return value; | ||
| 377 | } | ||
| 378 | } | 127 | } |
| 128 | return false; | ||
| 129 | } | ||
| 379 | 130 | ||
| 380 | /** | 131 | class GLSLDecompiler final { |
| 381 | * Gets a register as an float. | 132 | public: |
| 382 | * @param reg The register to get. | 133 | explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix) |
| 383 | * @param elem The element to use for the operation. | 134 | : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 384 | * @returns GLSL string corresponding to the register as a float. | ||
| 385 | */ | ||
| 386 | std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) { | ||
| 387 | return GetRegister(reg, elem); | ||
| 388 | } | ||
| 389 | |||
| 390 | /** | ||
| 391 | * Gets a register as an integer. | ||
| 392 | * @param reg The register to get. | ||
| 393 | * @param elem The element to use for the operation. | ||
| 394 | * @param is_signed Whether to get the register as a signed (or unsigned) integer. | ||
| 395 | * @param size Register size to use for conversion instructions. | ||
| 396 | * @returns GLSL string corresponding to the register as an integer. | ||
| 397 | */ | ||
| 398 | std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true, | ||
| 399 | Register::Size size = Register::Size::Word) { | ||
| 400 | const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"}; | ||
| 401 | const std::string value{func + '(' + GetRegister(reg, elem) + ')'}; | ||
| 402 | return ConvertIntegerSize(value, size); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * Writes code that does a register assignment to float value operation. | ||
| 407 | * @param reg The destination register to use. | ||
| 408 | * @param elem The element to use for the operation. | ||
| 409 | * @param value The code representing the value to assign. | ||
| 410 | * @param dest_num_components Number of components in the destination. | ||
| 411 | * @param value_num_components Number of components in the value. | ||
| 412 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 413 | * @param sets_cc Optional, when True, sets the corresponding values to the implemented | ||
| 414 | * condition flags. | ||
| 415 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 416 | */ | ||
| 417 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 418 | u64 dest_num_components, u64 value_num_components, | ||
| 419 | bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0, | ||
| 420 | bool precise = false) { | ||
| 421 | const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value; | ||
| 422 | SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem, | ||
| 423 | precise); | ||
| 424 | if (sets_cc) { | ||
| 425 | if (reg == Register::ZeroIndex) { | ||
| 426 | SetConditionalCodesFromExpression(clamped_value); | ||
| 427 | } else { | ||
| 428 | SetConditionalCodesFromRegister(reg, dest_elem); | ||
| 429 | } | ||
| 430 | } | ||
| 431 | } | ||
| 432 | 135 | ||
| 433 | /** | 136 | void Decompile() { |
| 434 | * Writes code that does a register assignment to integer value operation. | 137 | DeclareVertex(); |
| 435 | * @param reg The destination register to use. | 138 | DeclareGeometry(); |
| 436 | * @param elem The element to use for the operation. | 139 | DeclareRegisters(); |
| 437 | * @param value The code representing the value to assign. | 140 | DeclarePredicates(); |
| 438 | * @param dest_num_components Number of components in the destination. | 141 | DeclareLocalMemory(); |
| 439 | * @param value_num_components Number of components in the value. | 142 | DeclareInternalFlags(); |
| 440 | * @param is_saturated Optional, when True, saturates the provided value. | 143 | DeclareInputAttributes(); |
| 441 | * @param sets_cc Optional, when True, sets the corresponding values to the implemented | 144 | DeclareOutputAttributes(); |
| 442 | * condition flags. | 145 | DeclareConstantBuffers(); |
| 443 | * @param dest_elem Optional, the destination element to use for the operation. | 146 | DeclareSamplers(); |
| 444 | * @param size Register size to use for conversion instructions. | ||
| 445 | */ | ||
| 446 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, | ||
| 447 | const std::string& value, u64 dest_num_components, | ||
| 448 | u64 value_num_components, bool is_saturated = false, | ||
| 449 | bool sets_cc = false, u64 dest_elem = 0, | ||
| 450 | Register::Size size = Register::Size::Word) { | ||
| 451 | UNIMPLEMENTED_IF(is_saturated); | ||
| 452 | const std::string final_value = ConvertIntegerSize(value, size); | ||
| 453 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 454 | |||
| 455 | SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components, | ||
| 456 | value_num_components, dest_elem, false); | ||
| 457 | |||
| 458 | if (sets_cc) { | ||
| 459 | if (reg == Register::ZeroIndex) { | ||
| 460 | SetConditionalCodesFromExpression(final_value); | ||
| 461 | } else { | ||
| 462 | SetConditionalCodesFromRegister(reg, dest_elem); | ||
| 463 | } | ||
| 464 | } | ||
| 465 | } | ||
| 466 | 147 | ||
| 467 | /** | 148 | code.AddLine("void execute_" + suffix + "() {"); |
| 468 | * Writes code that does a register assignment to a half float value operation. | 149 | ++code.scope; |
| 469 | * @param reg The destination register to use. | ||
| 470 | * @param elem The element to use for the operation. | ||
| 471 | * @param value The code representing the value to assign. Type has to be half float. | ||
| 472 | * @param merge Half float kind of assignment. | ||
| 473 | * @param dest_num_components Number of components in the destination. | ||
| 474 | * @param value_num_components Number of components in the value. | ||
| 475 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 476 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 477 | */ | ||
| 478 | void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 479 | Tegra::Shader::HalfMerge merge, u64 dest_num_components, | ||
| 480 | u64 value_num_components, bool is_saturated = false, | ||
| 481 | u64 dest_elem = 0) { | ||
| 482 | UNIMPLEMENTED_IF(is_saturated); | ||
| 483 | |||
| 484 | const std::string result = [&]() { | ||
| 485 | switch (merge) { | ||
| 486 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 487 | return "uintBitsToFloat(packHalf2x16(" + value + "))"; | ||
| 488 | case Tegra::Shader::HalfMerge::F32: | ||
| 489 | // Half float instructions take the first component when doing a float cast. | ||
| 490 | return "float(" + value + ".x)"; | ||
| 491 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 492 | // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the | ||
| 493 | // pack. I couldn't test this on hardware but it shouldn't really matter since most | ||
| 494 | // of the time when a Mrg_* flag is used both components will be mirrored. That | ||
| 495 | // being said, it deserves a test. | ||
| 496 | return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 497 | " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; | ||
| 498 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 499 | return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 500 | " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; | ||
| 501 | default: | ||
| 502 | UNREACHABLE(); | ||
| 503 | return std::string("0"); | ||
| 504 | } | ||
| 505 | }(); | ||
| 506 | 150 | ||
| 507 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); | 151 | // VM's program counter |
| 508 | } | 152 | const auto first_address = ir.GetBasicBlocks().begin()->first; |
| 153 | code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;"); | ||
| 509 | 154 | ||
| 510 | /** | 155 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 511 | * Writes code that does a register assignment to input attribute operation. Input attributes | 156 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 512 | * are stored as floats, so this may require conversion. | 157 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 513 | * @param reg The destination register to use. | 158 | code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE)); |
| 514 | * @param elem The element to use for the operation. | 159 | code.AddLine("uint flow_stack_top = 0u;"); |
| 515 | * @param attribute The input attribute to use as the source value. | ||
| 516 | * @param input_mode The input mode. | ||
| 517 | * @param vertex The register that decides which vertex to read from (used in GS). | ||
| 518 | */ | ||
| 519 | void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute, | ||
| 520 | const Tegra::Shader::IpaMode& input_mode, | ||
| 521 | std::optional<Register> vertex = {}) { | ||
| 522 | const std::string dest = GetRegisterAsFloat(reg); | ||
| 523 | const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem); | ||
| 524 | shader.AddLine(dest + " = " + src + ';'); | ||
| 525 | } | ||
| 526 | 160 | ||
| 527 | std::string GetLocalMemoryAsFloat(const std::string& index) { | 161 | code.AddLine("while (true) {"); |
| 528 | return "lmem[" + index + ']'; | 162 | ++code.scope; |
| 529 | } | ||
| 530 | 163 | ||
| 531 | std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { | 164 | code.AddLine("switch (jmp_to) {"); |
| 532 | const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; | ||
| 533 | return func + "(lmem[" + index + "])"; | ||
| 534 | } | ||
| 535 | 165 | ||
| 536 | void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { | 166 | for (const auto& pair : ir.GetBasicBlocks()) { |
| 537 | shader.AddLine("lmem[" + index + "] = " + value + ';'); | 167 | const auto [address, bb] = pair; |
| 538 | } | 168 | code.AddLine(fmt::format("case 0x{:x}u: {{", address)); |
| 169 | ++code.scope; | ||
| 539 | 170 | ||
| 540 | void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, | 171 | VisitBasicBlock(bb); |
| 541 | bool is_signed = false) { | ||
| 542 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 543 | shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); | ||
| 544 | } | ||
| 545 | 172 | ||
| 546 | std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const { | 173 | --code.scope; |
| 547 | switch (cc) { | 174 | code.AddLine('}'); |
| 548 | case Tegra::Shader::ConditionCode::NEU: | ||
| 549 | return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; | ||
| 550 | default: | ||
| 551 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | ||
| 552 | return "false"; | ||
| 553 | } | 175 | } |
| 554 | } | ||
| 555 | |||
| 556 | std::string GetInternalFlag(const InternalFlag flag) const { | ||
| 557 | const auto index = static_cast<u32>(flag); | ||
| 558 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 559 | |||
| 560 | return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix; | ||
| 561 | } | ||
| 562 | 176 | ||
| 563 | void SetInternalFlag(const InternalFlag flag, const std::string& value) const { | 177 | code.AddLine("default: return;"); |
| 564 | shader.AddLine(GetInternalFlag(flag) + " = " + value + ';'); | 178 | code.AddLine('}'); |
| 565 | } | ||
| 566 | |||
| 567 | /** | ||
| 568 | * Writes code that does a output attribute assignment to register operation. Output attributes | ||
| 569 | * are stored as floats, so this may require conversion. | ||
| 570 | * @param attribute The destination output attribute. | ||
| 571 | * @param elem The element to use for the operation. | ||
| 572 | * @param val_reg The register to use as the source value. | ||
| 573 | * @param buf_reg The register that tells which buffer to write to (used in geometry shaders). | ||
| 574 | */ | ||
| 575 | void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg, | ||
| 576 | const Register& buf_reg) { | ||
| 577 | const std::string dest = GetOutputAttribute(attribute); | ||
| 578 | const std::string src = GetRegisterAsFloat(val_reg); | ||
| 579 | if (dest.empty()) | ||
| 580 | return; | ||
| 581 | |||
| 582 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the | ||
| 583 | // instruction for now. | ||
| 584 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | ||
| 585 | // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry | ||
| 586 | // shader. These instructions use a dirty register as buffer index, to avoid some | ||
| 587 | // drivers from complaining about out of boundary writes, guard them. | ||
| 588 | const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + | ||
| 589 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; | ||
| 590 | shader.AddLine("amem[" + buf_index + "][" + | ||
| 591 | std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) + | ||
| 592 | " = " + src + ';'); | ||
| 593 | return; | ||
| 594 | } | ||
| 595 | 179 | ||
| 596 | switch (attribute) { | 180 | for (std::size_t i = 0; i < 2; ++i) { |
| 597 | case Attribute::Index::ClipDistances0123: | 181 | --code.scope; |
| 598 | case Attribute::Index::ClipDistances4567: { | 182 | code.AddLine('}'); |
| 599 | const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem; | ||
| 600 | UNIMPLEMENTED_IF_MSG( | ||
| 601 | ((header.vtg.clip_distances >> index) & 1) == 0, | ||
| 602 | "Shader is setting gl_ClipDistance{} without enabling it in the header", index); | ||
| 603 | |||
| 604 | clip_distances[index] = true; | ||
| 605 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 606 | shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';'); | ||
| 607 | break; | ||
| 608 | } | ||
| 609 | case Attribute::Index::PointSize: | ||
| 610 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 611 | shader.AddLine(dest + " = " + src + ';'); | ||
| 612 | break; | ||
| 613 | default: | ||
| 614 | shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); | ||
| 615 | break; | ||
| 616 | } | 183 | } |
| 617 | } | 184 | } |
| 618 | 185 | ||
| 619 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. | 186 | std::string GetResult() { |
| 620 | std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, | 187 | return code.GetResult(); |
| 621 | Register::Size size = Register::Size::Word) { | ||
| 622 | declr_const_buffers[index].MarkAsUsed(index, offset, stage); | ||
| 623 | std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + | ||
| 624 | std::to_string(offset % 4) + ']'; | ||
| 625 | |||
| 626 | if (type == GLSLRegister::Type::Float) { | ||
| 627 | // Do nothing, default | ||
| 628 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 629 | value = "floatBitsToInt(" + value + ')'; | ||
| 630 | } else if (type == GLSLRegister::Type::UnsignedInteger) { | ||
| 631 | value = "floatBitsToUint(" + value + ')'; | ||
| 632 | } else { | ||
| 633 | UNREACHABLE(); | ||
| 634 | } | ||
| 635 | |||
| 636 | return ConvertIntegerSize(value, size); | ||
| 637 | } | 188 | } |
| 638 | 189 | ||
| 639 | std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, | 190 | ShaderEntries GetShaderEntries() const { |
| 640 | GLSLRegister::Type type) { | 191 | ShaderEntries entries; |
| 641 | declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); | 192 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 642 | 193 | ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first); | |
| 643 | const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); | 194 | entries.const_buffers.push_back(desc); |
| 644 | const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + | ||
| 645 | final_offset + " % 4]"; | ||
| 646 | |||
| 647 | if (type == GLSLRegister::Type::Float) { | ||
| 648 | return value; | ||
| 649 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 650 | return "floatBitsToInt(" + value + ')'; | ||
| 651 | } else { | ||
| 652 | UNREACHABLE(); | ||
| 653 | return value; | ||
| 654 | } | 195 | } |
| 655 | } | 196 | for (const auto& sampler : ir.GetSamplers()) { |
| 656 | 197 | SamplerEntry desc(sampler, stage, GetSampler(sampler)); | |
| 657 | /// Add declarations. | 198 | entries.samplers.push_back(desc); |
| 658 | void GenerateDeclarations(const std::string& suffix) { | ||
| 659 | GenerateVertex(); | ||
| 660 | GenerateRegisters(suffix); | ||
| 661 | GenerateLocalMemory(); | ||
| 662 | GenerateInternalFlags(); | ||
| 663 | GenerateInputAttrs(); | ||
| 664 | GenerateOutputAttrs(); | ||
| 665 | GenerateConstBuffers(); | ||
| 666 | GenerateSamplers(); | ||
| 667 | GenerateGeometry(); | ||
| 668 | } | ||
| 669 | |||
| 670 | /// Returns a list of constant buffer declarations. | ||
| 671 | std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const { | ||
| 672 | std::vector<ConstBufferEntry> result; | ||
| 673 | std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(), | ||
| 674 | std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); }); | ||
| 675 | return result; | ||
| 676 | } | ||
| 677 | |||
| 678 | /// Returns a list of samplers used in the shader. | ||
| 679 | const std::vector<SamplerEntry>& GetSamplers() const { | ||
| 680 | return used_samplers; | ||
| 681 | } | ||
| 682 | |||
| 683 | /// Returns an array of the used clip distances. | ||
| 684 | const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const { | ||
| 685 | return clip_distances; | ||
| 686 | } | ||
| 687 | |||
| 688 | /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if | ||
| 689 | /// necessary. | ||
| 690 | std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, | ||
| 691 | bool is_array, bool is_shadow) { | ||
| 692 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 693 | |||
| 694 | // If this sampler has already been used, return the existing mapping. | ||
| 695 | const auto itr = | ||
| 696 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 697 | [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); | ||
| 698 | |||
| 699 | if (itr != used_samplers.end()) { | ||
| 700 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 701 | itr->IsShadow() == is_shadow); | ||
| 702 | return itr->GetName(); | ||
| 703 | } | 199 | } |
| 704 | 200 | entries.clip_distances = ir.GetClipDistances(); | |
| 705 | // Otherwise create a new mapping for this sampler | 201 | entries.shader_length = ir.GetLength(); |
| 706 | const std::size_t next_index = used_samplers.size(); | 202 | return entries; |
| 707 | const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow}; | ||
| 708 | used_samplers.emplace_back(entry); | ||
| 709 | return entry.GetName(); | ||
| 710 | } | ||
| 711 | |||
| 712 | void SetLocalMemory(u64 lmem) { | ||
| 713 | local_memory_size = lmem; | ||
| 714 | } | 203 | } |
| 715 | 204 | ||
| 716 | private: | 205 | private: |
| 717 | /// Generates declarations for registers. | 206 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); |
| 718 | void GenerateRegisters(const std::string& suffix) { | 207 | using OperationDecompilersArray = |
| 719 | for (const auto& reg : regs) { | 208 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; |
| 720 | declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() + | ||
| 721 | std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;"); | ||
| 722 | } | ||
| 723 | declarations.AddNewLine(); | ||
| 724 | } | ||
| 725 | |||
| 726 | /// Generates declarations for local memory. | ||
| 727 | void GenerateLocalMemory() { | ||
| 728 | if (local_memory_size > 0) { | ||
| 729 | declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + | ||
| 730 | "];"); | ||
| 731 | declarations.AddNewLine(); | ||
| 732 | } | ||
| 733 | } | ||
| 734 | |||
| 735 | /// Generates declarations for internal flags. | ||
| 736 | void GenerateInternalFlags() { | ||
| 737 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||
| 738 | const InternalFlag code = static_cast<InternalFlag>(flag); | ||
| 739 | declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); | ||
| 740 | } | ||
| 741 | declarations.AddNewLine(); | ||
| 742 | } | ||
| 743 | |||
| 744 | /// Generates declarations for input attributes. | ||
| 745 | void GenerateInputAttrs() { | ||
| 746 | for (const auto element : declr_input_attribute) { | ||
| 747 | // TODO(bunnei): Use proper number of elements for these | ||
| 748 | u32 idx = | ||
| 749 | static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 750 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) { | ||
| 751 | // If inputs are varyings, add an offset | ||
| 752 | idx += GENERIC_VARYING_START_LOCATION; | ||
| 753 | } | ||
| 754 | |||
| 755 | std::string attr{GetInputAttribute(element.first, element.second)}; | ||
| 756 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | ||
| 757 | attr = "gs_" + attr + "[]"; | ||
| 758 | } | ||
| 759 | declarations.AddLine("layout (location = " + std::to_string(idx) + ") " + | ||
| 760 | GetInputFlags(element.first) + "in vec4 " + attr + ';'); | ||
| 761 | } | ||
| 762 | |||
| 763 | declarations.AddNewLine(); | ||
| 764 | } | ||
| 765 | 209 | ||
| 766 | /// Generates declarations for output attributes. | 210 | void DeclareVertex() { |
| 767 | void GenerateOutputAttrs() { | 211 | if (stage != ShaderStage::Vertex) |
| 768 | for (const auto& index : declr_output_attribute) { | 212 | return; |
| 769 | // TODO(bunnei): Use proper number of elements for these | ||
| 770 | const u32 idx = static_cast<u32>(index) - | ||
| 771 | static_cast<u32>(Attribute::Index::Attribute_0) + | ||
| 772 | GENERIC_VARYING_START_LOCATION; | ||
| 773 | declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + | ||
| 774 | GetOutputAttribute(index) + ';'); | ||
| 775 | } | ||
| 776 | declarations.AddNewLine(); | ||
| 777 | } | ||
| 778 | |||
| 779 | /// Generates declarations for constant buffers. | ||
| 780 | void GenerateConstBuffers() { | ||
| 781 | for (const auto& entry : GetConstBuffersDeclarations()) { | ||
| 782 | declarations.AddLine("layout (std140) uniform " + entry.GetName()); | ||
| 783 | declarations.AddLine('{'); | ||
| 784 | declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) + | ||
| 785 | "[MAX_CONSTBUFFER_ELEMENTS];"); | ||
| 786 | declarations.AddLine("};"); | ||
| 787 | declarations.AddNewLine(); | ||
| 788 | } | ||
| 789 | declarations.AddNewLine(); | ||
| 790 | } | ||
| 791 | 213 | ||
| 792 | /// Generates declarations for samplers. | 214 | DeclareVertexRedeclarations(); |
| 793 | void GenerateSamplers() { | ||
| 794 | const auto& samplers = GetSamplers(); | ||
| 795 | for (const auto& sampler : samplers) { | ||
| 796 | declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() + | ||
| 797 | ';'); | ||
| 798 | } | ||
| 799 | declarations.AddNewLine(); | ||
| 800 | } | 215 | } |
| 801 | 216 | ||
| 802 | /// Generates declarations used for geometry shaders. | 217 | void DeclareGeometry() { |
| 803 | void GenerateGeometry() { | 218 | if (stage != ShaderStage::Geometry) |
| 804 | if (stage != Maxwell3D::Regs::ShaderStage::Geometry) | ||
| 805 | return; | 219 | return; |
| 806 | 220 | ||
| 807 | declarations.AddLine( | 221 | const auto topology = GetTopologyName(header.common3.output_topology); |
| 808 | "layout (" + GetTopologyName(header.common3.output_topology) + | 222 | const auto max_vertices = std::to_string(header.common4.max_output_vertices); |
| 809 | ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;"); | 223 | code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;"); |
| 810 | declarations.AddNewLine(); | 224 | code.AddNewLine(); |
| 811 | |||
| 812 | declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" + | ||
| 813 | std::to_string(MAX_ATTRIBUTES) + "];"); | ||
| 814 | declarations.AddNewLine(); | ||
| 815 | |||
| 816 | constexpr char buffer[] = "amem[output_buffer]"; | ||
| 817 | declarations.AddLine("void emit_vertex(uint output_buffer) {"); | ||
| 818 | ++declarations.scope; | ||
| 819 | for (const auto element : declr_output_attribute) { | ||
| 820 | declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' + | ||
| 821 | std::to_string(static_cast<u32>(element)) + "];"); | ||
| 822 | } | ||
| 823 | |||
| 824 | declarations.AddLine("position = " + std::string(buffer) + '[' + | ||
| 825 | std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];"); | ||
| 826 | 225 | ||
| 827 | // If a geometry shader is attached, it will always flip (it's the last stage before | 226 | DeclareVertexRedeclarations(); |
| 828 | // fragment). For more info about flipping, refer to gl_shader_gen.cpp. | ||
| 829 | declarations.AddLine("position.xy *= viewport_flip.xy;"); | ||
| 830 | declarations.AddLine("gl_Position = position;"); | ||
| 831 | declarations.AddLine("position.w = 1.0;"); | ||
| 832 | declarations.AddLine("EmitVertex();"); | ||
| 833 | --declarations.scope; | ||
| 834 | declarations.AddLine('}'); | ||
| 835 | declarations.AddNewLine(); | ||
| 836 | } | 227 | } |
| 837 | 228 | ||
| 838 | void GenerateVertex() { | 229 | void DeclareVertexRedeclarations() { |
| 839 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) | ||
| 840 | return; | ||
| 841 | bool clip_distances_declared = false; | 230 | bool clip_distances_declared = false; |
| 842 | 231 | ||
| 843 | declarations.AddLine("out gl_PerVertex {"); | 232 | code.AddLine("out gl_PerVertex {"); |
| 844 | ++declarations.scope; | 233 | ++code.scope; |
| 845 | declarations.AddLine("vec4 gl_Position;"); | 234 | |
| 846 | for (auto& o : fixed_pipeline_output_attributes_used) { | 235 | code.AddLine("vec4 gl_Position;"); |
| 236 | |||
| 237 | for (const auto o : ir.GetOutputAttributes()) { | ||
| 847 | if (o == Attribute::Index::PointSize) | 238 | if (o == Attribute::Index::PointSize) |
| 848 | declarations.AddLine("float gl_PointSize;"); | 239 | code.AddLine("float gl_PointSize;"); |
| 849 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | 240 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || |
| 850 | o == Attribute::Index::ClipDistances4567)) { | 241 | o == Attribute::Index::ClipDistances4567)) { |
| 851 | declarations.AddLine("float gl_ClipDistance[];"); | 242 | code.AddLine("float gl_ClipDistance[];"); |
| 852 | clip_distances_declared = true; | 243 | clip_distances_declared = true; |
| 853 | } | 244 | } |
| 854 | } | 245 | } |
| 855 | --declarations.scope; | ||
| 856 | declarations.AddLine("};"); | ||
| 857 | } | ||
| 858 | |||
| 859 | /// Generates code representing a temporary (GPR) register. | ||
| 860 | std::string GetRegister(const Register& reg, unsigned elem) { | ||
| 861 | if (reg == Register::ZeroIndex) { | ||
| 862 | return "0"; | ||
| 863 | } | ||
| 864 | |||
| 865 | return regs[reg.GetSwizzledIndex(elem)].GetString(); | ||
| 866 | } | ||
| 867 | |||
| 868 | /** | ||
| 869 | * Writes code that does a register assignment to value operation. | ||
| 870 | * @param reg The destination register to use. | ||
| 871 | * @param elem The element to use for the operation. | ||
| 872 | * @param value The code representing the value to assign. | ||
| 873 | * @param dest_num_components Number of components in the destination. | ||
| 874 | * @param value_num_components Number of components in the value. | ||
| 875 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 876 | */ | ||
| 877 | void SetRegister(const Register& reg, u64 elem, const std::string& value, | ||
| 878 | u64 dest_num_components, u64 value_num_components, u64 dest_elem, | ||
| 879 | bool precise) { | ||
| 880 | if (reg == Register::ZeroIndex) { | ||
| 881 | // Setting RZ is a nop in hardware. | ||
| 882 | return; | ||
| 883 | } | ||
| 884 | |||
| 885 | std::string dest = GetRegister(reg, static_cast<u32>(dest_elem)); | ||
| 886 | if (dest_num_components > 1) { | ||
| 887 | dest += GetSwizzle(elem); | ||
| 888 | } | ||
| 889 | 246 | ||
| 890 | std::string src = '(' + value + ')'; | 247 | --code.scope; |
| 891 | if (value_num_components > 1) { | 248 | code.AddLine("};"); |
| 892 | src += GetSwizzle(elem); | 249 | code.AddNewLine(); |
| 893 | } | 250 | } |
| 894 | |||
| 895 | if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 896 | const auto scope = shader.Scope(); | ||
| 897 | 251 | ||
| 898 | // This avoids optimizations of constant propagation and keeps the code as the original | 252 | void DeclareRegisters() { |
| 899 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | 253 | const auto& registers = ir.GetRegisters(); |
| 900 | shader.AddLine("precise float tmp = " + src + ';'); | 254 | for (const u32 gpr : registers) { |
| 901 | shader.AddLine(dest + " = tmp;"); | 255 | code.AddLine("float " + GetRegister(gpr) + " = 0;"); |
| 902 | } else { | ||
| 903 | shader.AddLine(dest + " = " + src + ';'); | ||
| 904 | } | 256 | } |
| 257 | if (!registers.empty()) | ||
| 258 | code.AddNewLine(); | ||
| 905 | } | 259 | } |
| 906 | 260 | ||
| 907 | /// Build the GLSL register list. | 261 | void DeclarePredicates() { |
| 908 | void BuildRegisterList() { | 262 | const auto& predicates = ir.GetPredicates(); |
| 909 | regs.reserve(Register::NumRegisters); | 263 | for (const auto pred : predicates) { |
| 910 | 264 | code.AddLine("bool " + GetPredicate(pred) + " = false;"); | |
| 911 | for (std::size_t index = 0; index < Register::NumRegisters; ++index) { | ||
| 912 | regs.emplace_back(index, suffix); | ||
| 913 | } | 265 | } |
| 266 | if (!predicates.empty()) | ||
| 267 | code.AddNewLine(); | ||
| 914 | } | 268 | } |
| 915 | 269 | ||
| 916 | void BuildInputList() { | 270 | void DeclareLocalMemory() { |
| 917 | const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) - | 271 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { |
| 918 | static_cast<u32>(Attribute::Index::Attribute_0) + 1; | 272 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 919 | declr_input_attribute.reserve(size); | 273 | code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];"); |
| 274 | code.AddNewLine(); | ||
| 275 | } | ||
| 920 | } | 276 | } |
| 921 | 277 | ||
| 922 | /// Generates code representing an input attribute register. | 278 | void DeclareInternalFlags() { |
| 923 | std::string GetInputAttribute(Attribute::Index attribute, | 279 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { |
| 924 | const Tegra::Shader::IpaMode& input_mode, | 280 | const InternalFlag flag_code = static_cast<InternalFlag>(flag); |
| 925 | std::optional<Register> vertex = {}) { | 281 | code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;"); |
| 926 | auto GeometryPass = [&](const std::string& name) { | ||
| 927 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) { | ||
| 928 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set | ||
| 929 | // an 0x80000000 index for those and the shader fails to build. Find out why this | ||
| 930 | // happens and what's its intent. | ||
| 931 | return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) + | ||
| 932 | " % MAX_VERTEX_INPUT]"; | ||
| 933 | } | ||
| 934 | return name; | ||
| 935 | }; | ||
| 936 | |||
| 937 | switch (attribute) { | ||
| 938 | case Attribute::Index::Position: | ||
| 939 | if (stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 940 | return GeometryPass("position"); | ||
| 941 | } else { | ||
| 942 | return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)"; | ||
| 943 | } | ||
| 944 | case Attribute::Index::PointCoord: | ||
| 945 | return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)"; | ||
| 946 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 947 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 948 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 949 | // shader. | ||
| 950 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); | ||
| 951 | // Config pack's first value is instance_id. | ||
| 952 | return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))"; | ||
| 953 | case Attribute::Index::FrontFacing: | ||
| 954 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 955 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||
| 956 | return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))"; | ||
| 957 | default: | ||
| 958 | const u32 index{static_cast<u32>(attribute) - | ||
| 959 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 960 | if (attribute >= Attribute::Index::Attribute_0 && | ||
| 961 | attribute <= Attribute::Index::Attribute_31) { | ||
| 962 | if (declr_input_attribute.count(attribute) == 0) { | ||
| 963 | declr_input_attribute[attribute] = input_mode; | ||
| 964 | } else { | ||
| 965 | UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode, | ||
| 966 | "Multiple input modes for the same attribute"); | ||
| 967 | } | ||
| 968 | return GeometryPass("input_attribute_" + std::to_string(index)); | ||
| 969 | } | ||
| 970 | |||
| 971 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | ||
| 972 | } | 282 | } |
| 973 | 283 | code.AddNewLine(); | |
| 974 | return "vec4(0, 0, 0, 0)"; | ||
| 975 | } | 284 | } |
| 976 | 285 | ||
| 977 | std::string GetInputFlags(const Attribute::Index attribute) { | 286 | std::string GetInputFlags(const IpaMode& input_mode) { |
| 978 | const Tegra::Shader::IpaSampleMode sample_mode = | 287 | const IpaSampleMode sample_mode = input_mode.sampling_mode; |
| 979 | declr_input_attribute[attribute].sampling_mode; | 288 | const IpaInterpMode interp_mode = input_mode.interpolation_mode; |
| 980 | const Tegra::Shader::IpaInterpMode interp_mode = | ||
| 981 | declr_input_attribute[attribute].interpolation_mode; | ||
| 982 | std::string out; | 289 | std::string out; |
| 290 | |||
| 983 | switch (interp_mode) { | 291 | switch (interp_mode) { |
| 984 | case Tegra::Shader::IpaInterpMode::Flat: { | 292 | case IpaInterpMode::Flat: |
| 985 | out += "flat "; | 293 | out += "flat "; |
| 986 | break; | 294 | break; |
| 987 | } | 295 | case IpaInterpMode::Linear: |
| 988 | case Tegra::Shader::IpaInterpMode::Linear: { | ||
| 989 | out += "noperspective "; | 296 | out += "noperspective "; |
| 990 | break; | 297 | break; |
| 991 | } | 298 | case IpaInterpMode::Perspective: |
| 992 | case Tegra::Shader::IpaInterpMode::Perspective: { | ||
| 993 | // Default, Smooth | 299 | // Default, Smooth |
| 994 | break; | 300 | break; |
| 995 | } | 301 | default: |
| 996 | default: { | ||
| 997 | UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); | 302 | UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); |
| 998 | } | 303 | } |
| 999 | } | ||
| 1000 | switch (sample_mode) { | 304 | switch (sample_mode) { |
| 1001 | case Tegra::Shader::IpaSampleMode::Centroid: | 305 | case IpaSampleMode::Centroid: |
| 1002 | // It can be implemented with the "centroid " keyword in glsl | 306 | // It can be implemented with the "centroid " keyword in GLSL |
| 1003 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); | 307 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); |
| 1004 | break; | 308 | break; |
| 1005 | case Tegra::Shader::IpaSampleMode::Default: | 309 | case IpaSampleMode::Default: |
| 1006 | // Default, n/a | 310 | // Default, n/a |
| 1007 | break; | 311 | break; |
| 1008 | default: { | 312 | default: |
| 1009 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); | 313 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); |
| 1010 | break; | ||
| 1011 | } | ||
| 1012 | } | 314 | } |
| 1013 | return out; | 315 | return out; |
| 1014 | } | 316 | } |
| 1015 | 317 | ||
| 1016 | /// Generates code representing the declaration name of an output attribute register. | 318 | void DeclareInputAttributes() { |
| 1017 | std::string GetOutputAttribute(Attribute::Index attribute) { | 319 | const auto& attributes = ir.GetInputAttributes(); |
| 1018 | switch (attribute) { | 320 | for (const auto element : attributes) { |
| 1019 | case Attribute::Index::PointSize: | 321 | const Attribute::Index index = element.first; |
| 1020 | return "gl_PointSize"; | 322 | const IpaMode& input_mode = *element.second.begin(); |
| 1021 | case Attribute::Index::Position: | 323 | if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { |
| 1022 | return "position"; | 324 | // Skip when it's not a generic attribute |
| 1023 | case Attribute::Index::ClipDistances0123: | 325 | continue; |
| 1024 | case Attribute::Index::ClipDistances4567: { | ||
| 1025 | return "gl_ClipDistance"; | ||
| 1026 | } | ||
| 1027 | default: | ||
| 1028 | const u32 index{static_cast<u32>(attribute) - | ||
| 1029 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 1030 | if (attribute >= Attribute::Index::Attribute_0) { | ||
| 1031 | declr_output_attribute.insert(attribute); | ||
| 1032 | return "output_attribute_" + std::to_string(index); | ||
| 1033 | } | 326 | } |
| 1034 | 327 | ||
| 1035 | UNIMPLEMENTED_MSG("Unhandled output attribute={}", index); | 328 | ASSERT(element.second.size() > 0); |
| 1036 | return {}; | 329 | UNIMPLEMENTED_IF_MSG(element.second.size() > 1, |
| 1037 | } | 330 | "Multiple input flag modes are not supported in GLSL"); |
| 1038 | } | ||
| 1039 | |||
| 1040 | ShaderWriter& shader; | ||
| 1041 | ShaderWriter& declarations; | ||
| 1042 | std::vector<GLSLRegister> regs; | ||
| 1043 | std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute; | ||
| 1044 | std::set<Attribute::Index> declr_output_attribute; | ||
| 1045 | std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; | ||
| 1046 | std::vector<SamplerEntry> used_samplers; | ||
| 1047 | const Maxwell3D::Regs::ShaderStage& stage; | ||
| 1048 | const std::string& suffix; | ||
| 1049 | const Tegra::Shader::Header& header; | ||
| 1050 | std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; | ||
| 1051 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 1052 | u64 local_memory_size; | ||
| 1053 | }; | ||
| 1054 | |||
| 1055 | class GLSLGenerator { | ||
| 1056 | public: | ||
| 1057 | GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, | ||
| 1058 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix, | ||
| 1059 | std::size_t shader_length) | ||
| 1060 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | ||
| 1061 | stage(stage), suffix(suffix), shader_length(shader_length) { | ||
| 1062 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 1063 | local_memory_size = header.GetLocalMemorySize(); | ||
| 1064 | regs.SetLocalMemory(local_memory_size); | ||
| 1065 | Generate(suffix); | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | std::string GetShaderCode() { | ||
| 1069 | return declarations.GetResult() + shader.GetResult(); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | /// Returns entries in the shader that are useful for external functions | ||
| 1073 | ShaderEntries GetEntries() const { | ||
| 1074 | return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(), | ||
| 1075 | shader_length}; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | private: | ||
| 1079 | /// Gets the Subroutine object corresponding to the specified address. | ||
| 1080 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { | ||
| 1081 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); | ||
| 1082 | ASSERT(iter != subroutines.end()); | ||
| 1083 | return *iter; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /// Generates code representing a 19-bit immediate value | ||
| 1087 | static std::string GetImmediate19(const Instruction& instr) { | ||
| 1088 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); | ||
| 1089 | } | ||
| 1090 | 331 | ||
| 1091 | /// Generates code representing a 32-bit immediate value | 332 | // TODO(bunnei): Use proper number of elements for these |
| 1092 | static std::string GetImmediate32(const Instruction& instr) { | 333 | u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); |
| 1093 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); | 334 | if (stage != ShaderStage::Vertex) { |
| 1094 | } | 335 | // If inputs are varyings, add an offset |
| 336 | idx += GENERIC_VARYING_START_LOCATION; | ||
| 337 | } | ||
| 1095 | 338 | ||
| 1096 | /// Generates code representing a vec2 pair unpacked from a half float immediate | 339 | std::string attr = GetInputAttribute(index); |
| 1097 | static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) { | 340 | if (stage == ShaderStage::Geometry) { |
| 1098 | const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates())); | 341 | attr = "gs_" + attr + "[]"; |
| 1099 | if (!negate) { | 342 | } |
| 1100 | return immediate; | 343 | code.AddLine("layout (location = " + std::to_string(idx) + ") " + |
| 344 | GetInputFlags(input_mode) + "in vec4 " + attr + ';'); | ||
| 1101 | } | 345 | } |
| 1102 | const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : ""; | 346 | if (!attributes.empty()) |
| 1103 | const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : ""; | 347 | code.AddNewLine(); |
| 1104 | const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)"; | ||
| 1105 | |||
| 1106 | return '(' + immediate + " * " + negate_vec + ')'; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | /// Generates code representing a texture sampler. | ||
| 1110 | std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, | ||
| 1111 | bool is_shadow) { | ||
| 1112 | return regs.AccessSampler(sampler, type, is_array, is_shadow); | ||
| 1113 | } | 348 | } |
| 1114 | 349 | ||
| 1115 | /** | 350 | void DeclareOutputAttributes() { |
| 1116 | * Adds code that calls a subroutine. | 351 | const auto& attributes = ir.GetOutputAttributes(); |
| 1117 | * @param subroutine the subroutine to call. | 352 | for (const auto index : attributes) { |
| 1118 | */ | 353 | if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { |
| 1119 | void CallSubroutine(const Subroutine& subroutine) { | 354 | // Skip when it's not a generic attribute |
| 1120 | if (subroutine.exit_method == ExitMethod::AlwaysEnd) { | 355 | continue; |
| 1121 | shader.AddLine(subroutine.GetName() + "();"); | 356 | } |
| 1122 | shader.AddLine("return true;"); | 357 | // TODO(bunnei): Use proper number of elements for these |
| 1123 | } else if (subroutine.exit_method == ExitMethod::Conditional) { | 358 | const auto idx = static_cast<u32>(index) - |
| 1124 | shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); | 359 | static_cast<u32>(Attribute::Index::Attribute_0) + |
| 1125 | } else { | 360 | GENERIC_VARYING_START_LOCATION; |
| 1126 | shader.AddLine(subroutine.GetName() + "();"); | 361 | code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + |
| 362 | GetOutputAttribute(index) + ';'); | ||
| 1127 | } | 363 | } |
| 364 | if (!attributes.empty()) | ||
| 365 | code.AddNewLine(); | ||
| 1128 | } | 366 | } |
| 1129 | 367 | ||
| 1130 | /* | 368 | void DeclareConstantBuffers() { |
| 1131 | * Writes code that assigns a predicate boolean variable. | 369 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 1132 | * @param pred The id of the predicate to write to. | 370 | const auto [index, size] = entry; |
| 1133 | * @param value The expression value to assign to the predicate. | 371 | code.AddLine("layout (std140) uniform " + GetConstBufferBlock(index) + " {"); |
| 1134 | */ | 372 | code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); |
| 1135 | void SetPredicate(u64 pred, const std::string& value) { | 373 | code.AddLine("};"); |
| 1136 | using Tegra::Shader::Pred; | 374 | code.AddNewLine(); |
| 1137 | // Can't assign to the constant predicate. | ||
| 1138 | ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); | ||
| 1139 | |||
| 1140 | std::string variable = 'p' + std::to_string(pred) + '_' + suffix; | ||
| 1141 | shader.AddLine(variable + " = " + value + ';'); | ||
| 1142 | declr_predicates.insert(std::move(variable)); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | /* | ||
| 1146 | * Returns the condition to use in the 'if' for a predicated instruction. | ||
| 1147 | * @param instr Instruction to generate the if condition for. | ||
| 1148 | * @returns string containing the predicate condition. | ||
| 1149 | */ | ||
| 1150 | std::string GetPredicateCondition(u64 index, bool negate) { | ||
| 1151 | using Tegra::Shader::Pred; | ||
| 1152 | std::string variable; | ||
| 1153 | |||
| 1154 | // Index 7 is used as an 'Always True' condition. | ||
| 1155 | if (index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 1156 | variable = "true"; | ||
| 1157 | } else { | ||
| 1158 | variable = 'p' + std::to_string(index) + '_' + suffix; | ||
| 1159 | declr_predicates.insert(variable); | ||
| 1160 | } | 375 | } |
| 1161 | if (negate) { | ||
| 1162 | return "!(" + variable + ')'; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | return variable; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /** | ||
| 1169 | * Returns the comparison string to use to compare two values in the 'set' family of | ||
| 1170 | * instructions. | ||
| 1171 | * @param condition The condition used in the 'set'-family instruction. | ||
| 1172 | * @param op_a First operand to use for the comparison. | ||
| 1173 | * @param op_b Second operand to use for the comparison. | ||
| 1174 | * @returns String corresponding to the GLSL operator that matches the desired comparison. | ||
| 1175 | */ | ||
| 1176 | std::string GetPredicateComparison(Tegra::Shader::PredCondition condition, | ||
| 1177 | const std::string& op_a, const std::string& op_b) const { | ||
| 1178 | using Tegra::Shader::PredCondition; | ||
| 1179 | static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { | ||
| 1180 | {PredCondition::LessThan, "<"}, | ||
| 1181 | {PredCondition::Equal, "=="}, | ||
| 1182 | {PredCondition::LessEqual, "<="}, | ||
| 1183 | {PredCondition::GreaterThan, ">"}, | ||
| 1184 | {PredCondition::NotEqual, "!="}, | ||
| 1185 | {PredCondition::GreaterEqual, ">="}, | ||
| 1186 | {PredCondition::LessThanWithNan, "<"}, | ||
| 1187 | {PredCondition::NotEqualWithNan, "!="}, | ||
| 1188 | {PredCondition::LessEqualWithNan, "<="}, | ||
| 1189 | {PredCondition::GreaterThanWithNan, ">"}, | ||
| 1190 | {PredCondition::GreaterEqualWithNan, ">="}}; | ||
| 1191 | |||
| 1192 | const auto& comparison{PredicateComparisonStrings.find(condition)}; | ||
| 1193 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(), | ||
| 1194 | "Unknown predicate comparison operation"); | ||
| 1195 | |||
| 1196 | std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; | ||
| 1197 | if (condition == PredCondition::LessThanWithNan || | ||
| 1198 | condition == PredCondition::NotEqualWithNan || | ||
| 1199 | condition == PredCondition::LessEqualWithNan || | ||
| 1200 | condition == PredCondition::GreaterThanWithNan || | ||
| 1201 | condition == PredCondition::GreaterEqualWithNan) { | ||
| 1202 | predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | return predicate; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | /** | ||
| 1209 | * Returns the operator string to use to combine two predicates in the 'setp' family of | ||
| 1210 | * instructions. | ||
| 1211 | * @params operation The operator used in the 'setp'-family instruction. | ||
| 1212 | * @returns String corresponding to the GLSL operator that matches the desired operator. | ||
| 1213 | */ | ||
| 1214 | std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const { | ||
| 1215 | using Tegra::Shader::PredOperation; | ||
| 1216 | static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = { | ||
| 1217 | {PredOperation::And, "&&"}, | ||
| 1218 | {PredOperation::Or, "||"}, | ||
| 1219 | {PredOperation::Xor, "^^"}, | ||
| 1220 | }; | ||
| 1221 | |||
| 1222 | auto op = PredicateOperationStrings.find(operation); | ||
| 1223 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation"); | ||
| 1224 | return op->second; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | /** | ||
| 1228 | * Transforms the input string GLSL operand into one that applies the abs() function and negates | ||
| 1229 | * the output if necessary. When both abs and neg are true, the negation will be applied after | ||
| 1230 | * taking the absolute value. | ||
| 1231 | * @param operand The input operand to take the abs() of, negate, or both. | ||
| 1232 | * @param abs Whether to apply the abs() function to the input operand. | ||
| 1233 | * @param neg Whether to negate the input operand. | ||
| 1234 | * @returns String corresponding to the operand after being transformed by the abs() and | ||
| 1235 | * negation operations. | ||
| 1236 | */ | ||
| 1237 | static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) { | ||
| 1238 | std::string result = operand; | ||
| 1239 | |||
| 1240 | if (abs) { | ||
| 1241 | result = "abs(" + result + ')'; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | if (neg) { | ||
| 1245 | result = "-(" + result + ')'; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | return result; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | /* | ||
| 1252 | * Transforms the input string GLSL operand into an unpacked half float pair. | ||
| 1253 | * @note This function returns a float type pair instead of a half float pair. This is because | ||
| 1254 | * real half floats are not standardized in GLSL but unpackHalf2x16 (which returns a vec2) is. | ||
| 1255 | * @param operand Input operand. It has to be an unsigned integer. | ||
| 1256 | * @param type How to unpack the unsigned integer to a half float pair. | ||
| 1257 | * @param abs Get the absolute value of unpacked half floats. | ||
| 1258 | * @param neg Get the negative value of unpacked half floats. | ||
| 1259 | * @returns String corresponding to a half float pair. | ||
| 1260 | */ | ||
| 1261 | static std::string GetHalfFloat(const std::string& operand, | ||
| 1262 | Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1, | ||
| 1263 | bool abs = false, bool neg = false) { | ||
| 1264 | // "vec2" calls emitted in this function are intended to alias components. | ||
| 1265 | const std::string value = [&]() { | ||
| 1266 | switch (type) { | ||
| 1267 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1268 | return "unpackHalf2x16(" + operand + ')'; | ||
| 1269 | case Tegra::Shader::HalfType::F32: | ||
| 1270 | return "vec2(uintBitsToFloat(" + operand + "))"; | ||
| 1271 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1272 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1273 | const bool high = type == Tegra::Shader::HalfType::H1_H1; | ||
| 1274 | const char unpack_index = "xy"[high ? 1 : 0]; | ||
| 1275 | return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')'; | ||
| 1276 | } | ||
| 1277 | default: | ||
| 1278 | UNREACHABLE(); | ||
| 1279 | return std::string("vec2(0)"); | ||
| 1280 | } | ||
| 1281 | }(); | ||
| 1282 | |||
| 1283 | return GetOperandAbsNeg(value, abs, neg); | ||
| 1284 | } | ||
| 1285 | |||
| 1286 | /* | ||
| 1287 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 1288 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 1289 | */ | ||
| 1290 | bool IsSchedInstruction(u32 offset) const { | ||
| 1291 | // sched instructions appear once every 4 instructions. | ||
| 1292 | static constexpr std::size_t SchedPeriod = 4; | ||
| 1293 | u32 absolute_offset = offset - main_offset; | ||
| 1294 | |||
| 1295 | return (absolute_offset % SchedPeriod) == 0; | ||
| 1296 | } | 376 | } |
| 1297 | 377 | ||
| 1298 | void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, | 378 | void DeclareSamplers() { |
| 1299 | const std::string& op_b, | 379 | const auto& samplers = ir.GetSamplers(); |
| 1300 | Tegra::Shader::PredicateResultMode predicate_mode, | 380 | for (const auto& sampler : samplers) { |
| 1301 | Tegra::Shader::Pred predicate, const bool set_cc) { | 381 | std::string sampler_type = [&]() { |
| 1302 | std::string result{}; | 382 | switch (sampler.GetType()) { |
| 1303 | switch (logic_op) { | 383 | case Tegra::Shader::TextureType::Texture1D: |
| 1304 | case LogicOperation::And: { | 384 | return "sampler1D"; |
| 1305 | result = '(' + op_a + " & " + op_b + ')'; | 385 | case Tegra::Shader::TextureType::Texture2D: |
| 1306 | break; | 386 | return "sampler2D"; |
| 1307 | } | 387 | case Tegra::Shader::TextureType::Texture3D: |
| 1308 | case LogicOperation::Or: { | 388 | return "sampler3D"; |
| 1309 | result = '(' + op_a + " | " + op_b + ')'; | 389 | case Tegra::Shader::TextureType::TextureCube: |
| 1310 | break; | 390 | return "samplerCube"; |
| 1311 | } | 391 | default: |
| 1312 | case LogicOperation::Xor: { | 392 | UNREACHABLE(); |
| 1313 | result = '(' + op_a + " ^ " + op_b + ')'; | 393 | return "sampler2D"; |
| 1314 | break; | 394 | } |
| 1315 | } | 395 | }(); |
| 1316 | case LogicOperation::PassB: { | 396 | if (sampler.IsArray()) |
| 1317 | result = op_b; | 397 | sampler_type += "Array"; |
| 1318 | break; | 398 | if (sampler.IsShadow()) |
| 1319 | } | 399 | sampler_type += "Shadow"; |
| 1320 | default: | ||
| 1321 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | if (dest != Tegra::Shader::Register::ZeroIndex) { | ||
| 1325 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); | ||
| 1326 | } | ||
| 1327 | 400 | ||
| 1328 | using Tegra::Shader::PredicateResultMode; | 401 | code.AddLine("uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); |
| 1329 | // Write the predicate value depending on the predicate mode. | ||
| 1330 | switch (predicate_mode) { | ||
| 1331 | case PredicateResultMode::None: | ||
| 1332 | // Do nothing. | ||
| 1333 | return; | ||
| 1334 | case PredicateResultMode::NotZero: | ||
| 1335 | // Set the predicate to true if the result is not zero. | ||
| 1336 | SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); | ||
| 1337 | break; | ||
| 1338 | default: | ||
| 1339 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", | ||
| 1340 | static_cast<u32>(predicate_mode)); | ||
| 1341 | } | 402 | } |
| 403 | if (!samplers.empty()) | ||
| 404 | code.AddNewLine(); | ||
| 1342 | } | 405 | } |
| 1343 | 406 | ||
| 1344 | void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, | 407 | void VisitBasicBlock(const BasicBlock& bb) { |
| 1345 | const std::string& op_c, const std::string& imm_lut, | 408 | for (const Node node : bb) { |
| 1346 | const bool set_cc) { | 409 | if (const std::string expr = Visit(node); !expr.empty()) { |
| 1347 | if (dest == Tegra::Shader::Register::ZeroIndex) { | 410 | code.AddLine(expr); |
| 1348 | return; | 411 | } |
| 1349 | } | 412 | } |
| 413 | } | ||
| 1350 | 414 | ||
| 1351 | static constexpr std::array<const char*, 32> shift_amounts = { | 415 | std::string Visit(Node node) { |
| 1352 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", | 416 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 1353 | "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", | 417 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); |
| 1354 | "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"}; | 418 | const auto decompiler = operation_decompilers[operation_index]; |
| 1355 | 419 | if (decompiler == nullptr) { | |
| 1356 | std::string result; | 420 | UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); |
| 1357 | result += '('; | 421 | } |
| 1358 | 422 | return (this->*decompiler)(*operation); | |
| 1359 | for (std::size_t i = 0; i < shift_amounts.size(); ++i) { | ||
| 1360 | if (i) | ||
| 1361 | result += '|'; | ||
| 1362 | result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + | ||
| 1363 | ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a + | ||
| 1364 | " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")"; | ||
| 1365 | } | ||
| 1366 | 423 | ||
| 1367 | result += ')'; | 424 | } else if (const auto gpr = std::get_if<GprNode>(node)) { |
| 425 | const u32 index = gpr->GetIndex(); | ||
| 426 | if (index == Register::ZeroIndex) { | ||
| 427 | return "0"; | ||
| 428 | } | ||
| 429 | return GetRegister(index); | ||
| 1368 | 430 | ||
| 1369 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); | 431 | } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { |
| 1370 | } | 432 | const u32 value = immediate->GetValue(); |
| 433 | if (value < 10) { | ||
| 434 | // For eyecandy avoid using hex numbers on single digits | ||
| 435 | return fmt::format("utof({}u)", immediate->GetValue()); | ||
| 436 | } | ||
| 437 | return fmt::format("utof(0x{:x}u)", immediate->GetValue()); | ||
| 1371 | 438 | ||
| 1372 | void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { | 439 | } else if (const auto predicate = std::get_if<PredicateNode>(node)) { |
| 1373 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 440 | const auto value = [&]() -> std::string { |
| 1374 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 441 | switch (const auto index = predicate->GetIndex(); index) { |
| 442 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 443 | return "true"; | ||
| 444 | case Tegra::Shader::Pred::NeverExecute: | ||
| 445 | return "false"; | ||
| 446 | default: | ||
| 447 | return GetPredicate(index); | ||
| 448 | } | ||
| 449 | }(); | ||
| 450 | if (predicate->IsNegated()) { | ||
| 451 | return "!(" + value + ')'; | ||
| 452 | } | ||
| 453 | return value; | ||
| 1375 | 454 | ||
| 1376 | std::size_t written_components = 0; | 455 | } else if (const auto abuf = std::get_if<AbufNode>(node)) { |
| 1377 | for (u32 component = 0; component < 4; ++component) { | 456 | const auto attribute = abuf->GetIndex(); |
| 1378 | if (!instr.texs.IsComponentEnabled(component)) { | 457 | const auto element = abuf->GetElement(); |
| 1379 | continue; | 458 | |
| 459 | const auto GeometryPass = [&](const std::string& name) { | ||
| 460 | if (stage == ShaderStage::Geometry && abuf->GetBuffer()) { | ||
| 461 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | ||
| 462 | // set an 0x80000000 index for those and the shader fails to build. Find out why | ||
| 463 | // this happens and what's its intent. | ||
| 464 | return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) + | ||
| 465 | ") % MAX_VERTEX_INPUT]"; | ||
| 466 | } | ||
| 467 | return name; | ||
| 468 | }; | ||
| 469 | |||
| 470 | switch (attribute) { | ||
| 471 | case Attribute::Index::Position: | ||
| 472 | if (stage != ShaderStage::Fragment) { | ||
| 473 | return GeometryPass("position") + GetSwizzle(element); | ||
| 474 | } else { | ||
| 475 | return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); | ||
| 476 | } | ||
| 477 | case Attribute::Index::PointCoord: | ||
| 478 | switch (element) { | ||
| 479 | case 0: | ||
| 480 | return "gl_PointCoord.x"; | ||
| 481 | case 1: | ||
| 482 | return "gl_PointCoord.y"; | ||
| 483 | case 2: | ||
| 484 | case 3: | ||
| 485 | return "0"; | ||
| 486 | } | ||
| 487 | UNREACHABLE(); | ||
| 488 | return "0"; | ||
| 489 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 490 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 491 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 492 | // shader. | ||
| 493 | ASSERT(stage == ShaderStage::Vertex); | ||
| 494 | switch (element) { | ||
| 495 | case 2: | ||
| 496 | // Config pack's first value is instance_id. | ||
| 497 | return "uintBitsToFloat(config_pack[0])"; | ||
| 498 | case 3: | ||
| 499 | return "uintBitsToFloat(gl_VertexID)"; | ||
| 500 | } | ||
| 501 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 502 | return "0"; | ||
| 503 | case Attribute::Index::FrontFacing: | ||
| 504 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 505 | ASSERT(stage == ShaderStage::Fragment); | ||
| 506 | switch (element) { | ||
| 507 | case 3: | ||
| 508 | return "itof(gl_FrontFacing ? -1 : 0)"; | ||
| 509 | } | ||
| 510 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 511 | return "0"; | ||
| 512 | default: | ||
| 513 | if (attribute >= Attribute::Index::Attribute_0 && | ||
| 514 | attribute <= Attribute::Index::Attribute_31) { | ||
| 515 | return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); | ||
| 516 | } | ||
| 517 | break; | ||
| 1380 | } | 518 | } |
| 519 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | ||
| 520 | |||
| 521 | } else if (const auto cbuf = std::get_if<CbufNode>(node)) { | ||
| 522 | const Node offset = cbuf->GetOffset(); | ||
| 523 | if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | ||
| 524 | // Direct access | ||
| 525 | const u32 offset_imm = immediate->GetValue(); | ||
| 526 | return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / 4, | ||
| 527 | offset_imm % 4); | ||
| 528 | |||
| 529 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 530 | // Indirect access | ||
| 531 | const std::string final_offset = code.GenerateTemporal(); | ||
| 532 | code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + | ||
| 533 | std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); | ||
| 534 | return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), | ||
| 535 | final_offset, final_offset); | ||
| 1381 | 536 | ||
| 1382 | if (written_components < 2) { | ||
| 1383 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 1384 | regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false, | ||
| 1385 | written_components % 2); | ||
| 1386 | } else { | 537 | } else { |
| 1387 | ASSERT(instr.texs.HasTwoDestinations()); | 538 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 1388 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 1389 | regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false, | ||
| 1390 | written_components % 2); | ||
| 1391 | } | 539 | } |
| 1392 | 540 | ||
| 1393 | ++written_components; | 541 | } else if (const auto lmem = std::get_if<LmemNode>(node)) { |
| 1394 | } | 542 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1395 | } | ||
| 1396 | |||
| 1397 | void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { | ||
| 1398 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 1399 | // float instruction). | ||
| 1400 | 543 | ||
| 1401 | std::array<std::string, 4> components; | 544 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { |
| 1402 | u32 written_components = 0; | 545 | return GetInternalFlag(internal_flag->GetFlag()); |
| 1403 | 546 | ||
| 1404 | for (u32 component = 0; component < 4; ++component) { | 547 | } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { |
| 1405 | if (!instr.texs.IsComponentEnabled(component)) | 548 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 1406 | continue; | 549 | code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {"); |
| 1407 | components[written_components++] = texture + GetSwizzle(component); | 550 | ++code.scope; |
| 1408 | } | ||
| 1409 | if (written_components == 0) | ||
| 1410 | return; | ||
| 1411 | 551 | ||
| 1412 | const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { | 552 | VisitBasicBlock(conditional->GetCode()); |
| 1413 | return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; | ||
| 1414 | }; | ||
| 1415 | 553 | ||
| 1416 | regs.SetRegisterToHalfFloat( | 554 | --code.scope; |
| 1417 | instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), | 555 | code.AddLine('}'); |
| 1418 | Tegra::Shader::HalfMerge::H0_H1, 1, 1); | 556 | return {}; |
| 1419 | 557 | ||
| 1420 | if (written_components > 2) { | 558 | } else if (const auto comment = std::get_if<CommentNode>(node)) { |
| 1421 | ASSERT(instr.texs.HasTwoDestinations()); | 559 | return "// " + comment->GetText(); |
| 1422 | regs.SetRegisterToHalfFloat( | ||
| 1423 | instr.gpr28, 0, | ||
| 1424 | BuildComponent(components[2], components[3], written_components > 3), | ||
| 1425 | Tegra::Shader::HalfMerge::H0_H1, 1, 1); | ||
| 1426 | } | 560 | } |
| 561 | UNREACHABLE(); | ||
| 562 | return {}; | ||
| 1427 | } | 563 | } |
| 1428 | 564 | ||
| 1429 | static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { | 565 | std::string ApplyPrecise(Operation operation, const std::string& value) { |
| 1430 | switch (texture_type) { | 566 | if (!IsPrecise(operation)) { |
| 1431 | case Tegra::Shader::TextureType::Texture1D: | 567 | return value; |
| 1432 | return 1; | ||
| 1433 | case Tegra::Shader::TextureType::Texture2D: | ||
| 1434 | return 2; | ||
| 1435 | case Tegra::Shader::TextureType::Texture3D: | ||
| 1436 | case Tegra::Shader::TextureType::TextureCube: | ||
| 1437 | return 3; | ||
| 1438 | default: | ||
| 1439 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 1440 | return 0; | ||
| 1441 | } | 568 | } |
| 1442 | } | 569 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 1443 | 570 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | |
| 1444 | /* | ||
| 1445 | * Emits code to push the input target address to the flow address stack, incrementing the stack | ||
| 1446 | * top. | ||
| 1447 | */ | ||
| 1448 | void EmitPushToFlowStack(u32 target) { | ||
| 1449 | const auto scope = shader.Scope(); | ||
| 1450 | 571 | ||
| 1451 | shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); | 572 | const std::string temporal = code.GenerateTemporal(); |
| 1452 | shader.AddLine("flow_stack_top++;"); | 573 | code.AddLine(precise + "float " + temporal + " = " + value + ';'); |
| 574 | return temporal; | ||
| 1453 | } | 575 | } |
| 1454 | 576 | ||
| 1455 | /* | 577 | std::string VisitOperand(Operation operation, std::size_t operand_index) { |
| 1456 | * Emits code to pop an address from the flow address stack, setting the jump address to the | 578 | const auto& operand = operation[operand_index]; |
| 1457 | * popped address and decrementing the stack top. | 579 | const bool parent_precise = IsPrecise(operation); |
| 1458 | */ | 580 | const bool child_precise = IsPrecise(operand); |
| 1459 | void EmitPopFromFlowStack() { | 581 | const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); |
| 1460 | const auto scope = shader.Scope(); | 582 | if (!parent_precise || child_precise || child_trivial) { |
| 583 | return Visit(operand); | ||
| 584 | } | ||
| 1461 | 585 | ||
| 1462 | shader.AddLine("flow_stack_top--;"); | 586 | const std::string temporal = code.GenerateTemporal(); |
| 1463 | shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); | 587 | code.AddLine("float " + temporal + " = " + Visit(operand) + ';'); |
| 1464 | shader.AddLine("break;"); | 588 | return temporal; |
| 1465 | } | 589 | } |
| 1466 | 590 | ||
| 1467 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. | 591 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { |
| 1468 | void EmitFragmentOutputsWrite() { | 592 | std::string value = VisitOperand(operation, operand_index); |
| 1469 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||
| 1470 | |||
| 1471 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented"); | ||
| 1472 | 593 | ||
| 1473 | shader.AddLine("if (alpha_test[0] != 0) {"); | 594 | switch (type) { |
| 1474 | ++shader.scope; | 595 | case Type::Bool: |
| 1475 | // We start on the register containing the alpha value in the first RT. | 596 | case Type::Bool2: |
| 1476 | u32 current_reg = 3; | 597 | case Type::Float: |
| 1477 | for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; | 598 | return value; |
| 1478 | ++render_target) { | 599 | case Type::Int: |
| 1479 | // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when | 600 | return "ftoi(" + value + ')'; |
| 1480 | // multiple render targets are used. | 601 | case Type::Uint: |
| 1481 | if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || | 602 | return "ftou(" + value + ')'; |
| 1482 | header.ps.IsColorComponentOutputEnabled(render_target, 1) || | 603 | case Type::HalfFloat: |
| 1483 | header.ps.IsColorComponentOutputEnabled(render_target, 2) || | 604 | const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); |
| 1484 | header.ps.IsColorComponentOutputEnabled(render_target, 3)) { | 605 | if (!half_meta) { |
| 1485 | shader.AddLine(fmt::format("if (!AlphaFunc({})) discard;", | 606 | value = "toHalf2(" + value + ')'; |
| 1486 | regs.GetRegisterAsFloat(current_reg))); | ||
| 1487 | current_reg += 4; | ||
| 1488 | } | 607 | } |
| 1489 | } | ||
| 1490 | --shader.scope; | ||
| 1491 | shader.AddLine('}'); | ||
| 1492 | 608 | ||
| 1493 | // Write the color outputs using the data in the shader registers, disabled | 609 | switch (half_meta->types.at(operand_index)) { |
| 1494 | // rendertargets/components are skipped in the register assignment. | 610 | case Tegra::Shader::HalfType::H0_H1: |
| 1495 | current_reg = 0; | 611 | return "toHalf2(" + value + ')'; |
| 1496 | for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; | 612 | case Tegra::Shader::HalfType::F32: |
| 1497 | ++render_target) { | 613 | return "vec2(" + value + ')'; |
| 1498 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 614 | case Tegra::Shader::HalfType::H0_H0: |
| 1499 | for (u32 component = 0; component < 4; ++component) { | 615 | return "vec2(toHalf2(" + value + ")[0])"; |
| 1500 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 616 | case Tegra::Shader::HalfType::H1_H1: |
| 1501 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | 617 | return "vec2(toHalf2(" + value + ")[1])"; |
| 1502 | regs.GetRegisterAsFloat(current_reg))); | ||
| 1503 | ++current_reg; | ||
| 1504 | } | ||
| 1505 | } | 618 | } |
| 1506 | } | 619 | } |
| 1507 | 620 | UNREACHABLE(); | |
| 1508 | if (header.ps.omap.depth) { | 621 | return value; |
| 1509 | // The depth output is always 2 registers after the last color output, and current_reg | ||
| 1510 | // already contains one past the last color register. | ||
| 1511 | |||
| 1512 | shader.AddLine( | ||
| 1513 | "gl_FragDepth = " + | ||
| 1514 | regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) + | ||
| 1515 | ';'); | ||
| 1516 | } | ||
| 1517 | } | 622 | } |
| 1518 | 623 | ||
| 1519 | /// Unpacks a video instruction operand (e.g. VMAD). | 624 | std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { |
| 1520 | std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed, | 625 | switch (type) { |
| 1521 | Tegra::Shader::VideoType type, u64 byte_height) { | 626 | case Type::Bool: |
| 1522 | const std::string value = [&]() { | 627 | case Type::Float: |
| 1523 | if (!is_chunk) { | 628 | if (needs_parenthesis) { |
| 1524 | const auto offset = static_cast<u32>(byte_height * 8); | 629 | return '(' + value + ')'; |
| 1525 | return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; | ||
| 1526 | } | ||
| 1527 | const std::string zero = "0"; | ||
| 1528 | |||
| 1529 | switch (type) { | ||
| 1530 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 1531 | return '(' + op + " & 0xffff)"; | ||
| 1532 | case Tegra::Shader::VideoType::Size16_High: | ||
| 1533 | return '(' + op + " >> 16)"; | ||
| 1534 | case Tegra::Shader::VideoType::Size32: | ||
| 1535 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||
| 1536 | // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||
| 1537 | // explanation is found: abort. | ||
| 1538 | UNIMPLEMENTED(); | ||
| 1539 | return zero; | ||
| 1540 | case Tegra::Shader::VideoType::Invalid: | ||
| 1541 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 1542 | return zero; | ||
| 1543 | default: | ||
| 1544 | UNREACHABLE(); | ||
| 1545 | return zero; | ||
| 1546 | } | 630 | } |
| 1547 | }(); | 631 | return value; |
| 1548 | 632 | case Type::Int: | |
| 1549 | if (is_signed) { | 633 | return "itof(" + value + ')'; |
| 1550 | return "int(" + value + ')'; | 634 | case Type::Uint: |
| 1551 | } | 635 | return "utof(" + value + ')'; |
| 636 | case Type::HalfFloat: | ||
| 637 | return "fromHalf2(" + value + ')'; | ||
| 638 | } | ||
| 639 | UNREACHABLE(); | ||
| 1552 | return value; | 640 | return value; |
| 1553 | }; | ||
| 1554 | |||
| 1555 | /// Gets the A operand for a video instruction. | ||
| 1556 | std::string GetVideoOperandA(Instruction instr) { | ||
| 1557 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||
| 1558 | instr.video.is_byte_chunk_a != 0, instr.video.signed_a, | ||
| 1559 | instr.video.type_a, instr.video.byte_height_a); | ||
| 1560 | } | 641 | } |
| 1561 | 642 | ||
| 1562 | /// Gets the B operand for a video instruction. | 643 | std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, |
| 1563 | std::string GetVideoOperandB(Instruction instr) { | 644 | Type type_a, bool needs_parenthesis = true) { |
| 1564 | if (instr.video.use_register_b) { | 645 | return ApplyPrecise(operation, |
| 1565 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | 646 | BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')', |
| 1566 | instr.video.is_byte_chunk_b != 0, instr.video.signed_b, | 647 | result_type, needs_parenthesis)); |
| 1567 | instr.video.type_b, instr.video.byte_height_b); | ||
| 1568 | } else { | ||
| 1569 | return '(' + | ||
| 1570 | std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||
| 1571 | : instr.alu.GetImm20_16()) + | ||
| 1572 | ')'; | ||
| 1573 | } | ||
| 1574 | } | 648 | } |
| 1575 | 649 | ||
| 1576 | std::pair<size_t, std::string> ValidateAndGetCoordinateElement( | 650 | std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, |
| 1577 | const Tegra::Shader::TextureType texture_type, const bool depth_compare, | 651 | Type type_a, Type type_b) { |
| 1578 | const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) { | 652 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 1579 | const size_t coord_count = TextureCoordinates(texture_type); | 653 | const std::string op_b = VisitOperand(operation, 1, type_b); |
| 1580 | |||
| 1581 | size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 1582 | const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 1583 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 1584 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 1585 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 1586 | } | ||
| 1587 | // 1D.DC opengl is using a vec3 but 2nd component is ignored later. | ||
| 1588 | total_coord_count += | ||
| 1589 | (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) | ||
| 1590 | ? 1 | ||
| 1591 | : 0; | ||
| 1592 | |||
| 1593 | constexpr std::array<const char*, 5> coord_container{ | ||
| 1594 | {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(", | ||
| 1595 | "vec4 coord = vec4("}}; | ||
| 1596 | |||
| 1597 | return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]); | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | std::string GetTextureCode(const Tegra::Shader::Instruction& instr, | ||
| 1601 | const Tegra::Shader::TextureType texture_type, | ||
| 1602 | const Tegra::Shader::TextureProcessMode process_mode, | ||
| 1603 | const bool depth_compare, const bool is_array, | ||
| 1604 | const size_t bias_offset) { | ||
| 1605 | |||
| 1606 | if ((texture_type == Tegra::Shader::TextureType::Texture3D && | ||
| 1607 | (is_array || depth_compare)) || | ||
| 1608 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && | ||
| 1609 | depth_compare)) { | ||
| 1610 | UNIMPLEMENTED_MSG("This method is not supported."); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | const std::string sampler = | ||
| 1614 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 1615 | |||
| 1616 | const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ || | ||
| 1617 | process_mode == Tegra::Shader::TextureProcessMode::LL || | ||
| 1618 | process_mode == Tegra::Shader::TextureProcessMode::LLA; | ||
| 1619 | |||
| 1620 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 1621 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 1622 | const bool gl_lod_supported = !( | ||
| 1623 | (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | ||
| 1624 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | ||
| 1625 | |||
| 1626 | const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture("; | ||
| 1627 | std::string texture = read_method + sampler + ", coord"; | ||
| 1628 | |||
| 1629 | UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1630 | !gl_lod_supported); | ||
| 1631 | 654 | ||
| 1632 | if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) { | 655 | return ApplyPrecise( |
| 1633 | if (process_mode == Tegra::Shader::TextureProcessMode::LZ) { | 656 | operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type)); |
| 1634 | texture += ", 0.0"; | ||
| 1635 | } else { | ||
| 1636 | // If present, lod or bias are always stored in the register indexed by the | ||
| 1637 | // gpr20 | ||
| 1638 | // field with an offset depending on the usage of the other registers | ||
| 1639 | texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset); | ||
| 1640 | } | ||
| 1641 | } | ||
| 1642 | texture += ")"; | ||
| 1643 | return texture; | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | std::pair<std::string, std::string> GetTEXCode( | ||
| 1647 | const Instruction& instr, const Tegra::Shader::TextureType texture_type, | ||
| 1648 | const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, | ||
| 1649 | const bool is_array) { | ||
| 1650 | const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1651 | process_mode != Tegra::Shader::TextureProcessMode::LZ); | ||
| 1652 | |||
| 1653 | const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( | ||
| 1654 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 1655 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1656 | const u64 array_register = instr.gpr8.Value(); | ||
| 1657 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 1658 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1659 | |||
| 1660 | std::string coord = coord_dcl; | ||
| 1661 | for (size_t i = 0; i < coord_count;) { | ||
| 1662 | coord += regs.GetRegisterAsFloat(coord_register + i); | ||
| 1663 | ++i; | ||
| 1664 | if (i != coord_count) { | ||
| 1665 | coord += ','; | ||
| 1666 | } | ||
| 1667 | } | ||
| 1668 | // 1D.DC in opengl the 2nd component is ignored. | ||
| 1669 | if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) { | ||
| 1670 | coord += ",0.0"; | ||
| 1671 | } | ||
| 1672 | if (is_array) { | ||
| 1673 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1674 | } | ||
| 1675 | if (depth_compare) { | ||
| 1676 | // Depth is always stored in the register signaled by gpr20 | ||
| 1677 | // or in the next register if lod or bias are used | ||
| 1678 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 1679 | coord += ',' + regs.GetRegisterAsFloat(depth_register); | ||
| 1680 | } | ||
| 1681 | coord += ");"; | ||
| 1682 | return std::make_pair( | ||
| 1683 | coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0)); | ||
| 1684 | } | ||
| 1685 | |||
| 1686 | std::pair<std::string, std::string> GetTEXSCode( | ||
| 1687 | const Instruction& instr, const Tegra::Shader::TextureType texture_type, | ||
| 1688 | const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, | ||
| 1689 | const bool is_array) { | ||
| 1690 | const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1691 | process_mode != Tegra::Shader::TextureProcessMode::LZ); | ||
| 1692 | |||
| 1693 | const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( | ||
| 1694 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 1695 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1696 | const u64 array_register = instr.gpr8.Value(); | ||
| 1697 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 1698 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1699 | const u64 last_coord_register = | ||
| 1700 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 1701 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 1702 | : coord_register + 1; | ||
| 1703 | |||
| 1704 | std::string coord = coord_dcl; | ||
| 1705 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 1706 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 1707 | coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i); | ||
| 1708 | if (i < coord_count - 1) { | ||
| 1709 | coord += ','; | ||
| 1710 | } | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | if (is_array) { | ||
| 1714 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1715 | } | ||
| 1716 | if (depth_compare) { | ||
| 1717 | // Depth is always stored in the register signaled by gpr20 | ||
| 1718 | // or in the next register if lod or bias are used | ||
| 1719 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 1720 | coord += ',' + regs.GetRegisterAsFloat(depth_register); | ||
| 1721 | } | ||
| 1722 | coord += ");"; | ||
| 1723 | |||
| 1724 | return std::make_pair(coord, | ||
| 1725 | GetTextureCode(instr, texture_type, process_mode, depth_compare, | ||
| 1726 | is_array, (coord_count > 2 ? 1 : 0))); | ||
| 1727 | } | 657 | } |
| 1728 | 658 | ||
| 1729 | std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr, | 659 | std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, |
| 1730 | const Tegra::Shader::TextureType texture_type, | 660 | Type type_a, Type type_b) { |
| 1731 | const bool depth_compare, const bool is_array) { | 661 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 1732 | 662 | const std::string op_b = VisitOperand(operation, 1, type_b); | |
| 1733 | const size_t coord_count = TextureCoordinates(texture_type); | ||
| 1734 | const size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 1735 | const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 1736 | |||
| 1737 | constexpr std::array<const char*, 5> coord_container{ | ||
| 1738 | {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}}; | ||
| 1739 | 663 | ||
| 1740 | // If enabled arrays index is always stored in the gpr8 field | 664 | return ApplyPrecise(operation, |
| 1741 | const u64 array_register = instr.gpr8.Value(); | 665 | BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type)); |
| 1742 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 1743 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1744 | |||
| 1745 | std::string coord = coord_container[total_coord_count]; | ||
| 1746 | for (size_t i = 0; i < coord_count;) { | ||
| 1747 | coord += regs.GetRegisterAsFloat(coord_register + i); | ||
| 1748 | ++i; | ||
| 1749 | if (i != coord_count) { | ||
| 1750 | coord += ','; | ||
| 1751 | } | ||
| 1752 | } | ||
| 1753 | |||
| 1754 | if (is_array) { | ||
| 1755 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1756 | } | ||
| 1757 | coord += ");"; | ||
| 1758 | |||
| 1759 | const std::string sampler = | ||
| 1760 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 1761 | |||
| 1762 | std::string texture = "textureGather(" + sampler + ", coord, "; | ||
| 1763 | if (depth_compare) { | ||
| 1764 | // Depth is always stored in the register signaled by gpr20 | ||
| 1765 | texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')'; | ||
| 1766 | } else { | ||
| 1767 | texture += std::to_string(instr.tld4.component) + ')'; | ||
| 1768 | } | ||
| 1769 | return std::make_pair(coord, texture); | ||
| 1770 | } | 666 | } |
| 1771 | 667 | ||
| 1772 | std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr, | 668 | std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, |
| 1773 | const Tegra::Shader::TextureType texture_type, | 669 | Type type_a, Type type_b, Type type_c) { |
| 1774 | const bool is_array) { | 670 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 1775 | 671 | const std::string op_b = VisitOperand(operation, 1, type_b); | |
| 1776 | const size_t coord_count = TextureCoordinates(texture_type); | 672 | const std::string op_c = VisitOperand(operation, 2, type_c); |
| 1777 | const size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 1778 | const bool lod_enabled = | ||
| 1779 | instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL; | ||
| 1780 | |||
| 1781 | constexpr std::array<const char*, 4> coord_container{ | ||
| 1782 | {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}}; | ||
| 1783 | |||
| 1784 | std::string coord = coord_container[total_coord_count]; | ||
| 1785 | |||
| 1786 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1787 | const u64 array_register = instr.gpr8.Value(); | ||
| 1788 | |||
| 1789 | // if is array gpr20 is used | ||
| 1790 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 1791 | |||
| 1792 | const u64 last_coord_register = | ||
| 1793 | ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array | ||
| 1794 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 1795 | : coord_register + 1; | ||
| 1796 | 673 | ||
| 1797 | for (size_t i = 0; i < coord_count; ++i) { | 674 | return ApplyPrecise( |
| 1798 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | 675 | operation, |
| 1799 | coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i); | 676 | BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type)); |
| 1800 | if (i < coord_count - 1) { | 677 | } |
| 1801 | coord += ','; | ||
| 1802 | } | ||
| 1803 | } | ||
| 1804 | if (is_array) { | ||
| 1805 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1806 | } | ||
| 1807 | coord += ");"; | ||
| 1808 | |||
| 1809 | const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 1810 | |||
| 1811 | std::string texture = "texelFetch(" + sampler + ", coords"; | ||
| 1812 | |||
| 1813 | if (lod_enabled) { | ||
| 1814 | // When lod is used always is in grp20 | ||
| 1815 | texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')'; | ||
| 1816 | } else { | ||
| 1817 | texture += ", 0)"; | ||
| 1818 | } | ||
| 1819 | return std::make_pair(coord, texture); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | /** | ||
| 1823 | * Compiles a single instruction from Tegra to GLSL. | ||
| 1824 | * @param offset the offset of the Tegra shader instruction. | ||
| 1825 | * @return the offset of the next instruction to execute. Usually it is the current offset | ||
| 1826 | * + 1. If the current instruction always terminates the program, returns PROGRAM_END. | ||
| 1827 | */ | ||
| 1828 | u32 CompileInstr(u32 offset) { | ||
| 1829 | // Ignore sched instructions when generating code. | ||
| 1830 | if (IsSchedInstruction(offset)) { | ||
| 1831 | return offset + 1; | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | const Instruction instr = {program_code[offset]}; | ||
| 1835 | const auto opcode = OpCode::Decode(instr); | ||
| 1836 | |||
| 1837 | // Decoding failure | ||
| 1838 | if (!opcode) { | ||
| 1839 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 1840 | return offset + 1; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | shader.AddLine( | ||
| 1844 | fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value)); | ||
| 1845 | |||
| 1846 | using Tegra::Shader::Pred; | ||
| 1847 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 1848 | "NeverExecute predicate not implemented"); | ||
| 1849 | 678 | ||
| 1850 | // Some instructions (like SSY) don't have a predicate field, they are always | 679 | std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, |
| 1851 | // unconditionally executed. | 680 | Type type_a, Type type_b, Type type_c, Type type_d) { |
| 1852 | bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | 681 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 682 | const std::string op_b = VisitOperand(operation, 1, type_b); | ||
| 683 | const std::string op_c = VisitOperand(operation, 2, type_c); | ||
| 684 | const std::string op_d = VisitOperand(operation, 3, type_d); | ||
| 1853 | 685 | ||
| 1854 | if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { | 686 | return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + |
| 1855 | shader.AddLine("if (" + | 687 | op_c + ", " + op_d + ')', |
| 1856 | GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + | 688 | result_type)); |
| 1857 | ')'); | 689 | } |
| 1858 | shader.AddLine('{'); | ||
| 1859 | ++shader.scope; | ||
| 1860 | } | ||
| 1861 | 690 | ||
| 1862 | switch (opcode->get().GetType()) { | 691 | std::string GenerateTexture(Operation operation, const std::string& func, |
| 1863 | case OpCode::Type::Arithmetic: { | 692 | bool is_extra_int = false) { |
| 1864 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 693 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 1865 | 694 | ||
| 1866 | std::string op_b; | 695 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 696 | const auto count = static_cast<u32>(operation.GetOperandsCount()); | ||
| 697 | ASSERT(meta); | ||
| 1867 | 698 | ||
| 1868 | if (instr.is_b_imm) { | 699 | std::string expr = func; |
| 1869 | op_b = GetImmediate19(instr); | 700 | expr += '('; |
| 1870 | } else { | 701 | expr += GetSampler(meta->sampler); |
| 1871 | if (instr.is_b_gpr) { | 702 | expr += ", "; |
| 1872 | op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 1873 | } else { | ||
| 1874 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1875 | GLSLRegister::Type::Float); | ||
| 1876 | } | ||
| 1877 | } | ||
| 1878 | 703 | ||
| 1879 | switch (opcode->get().GetId()) { | 704 | expr += coord_constructors[meta->coords_count - 1]; |
| 1880 | case OpCode::Id::MOV_C: | 705 | expr += '('; |
| 1881 | case OpCode::Id::MOV_R: { | 706 | for (u32 i = 0; i < count; ++i) { |
| 1882 | // MOV does not have neither 'abs' nor 'neg' bits. | 707 | const bool is_extra = i >= meta->coords_count; |
| 1883 | regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); | 708 | const bool is_array = i == meta->array_index; |
| 1884 | break; | ||
| 1885 | } | ||
| 1886 | 709 | ||
| 1887 | case OpCode::Id::FMUL_C: | 710 | std::string operand = [&]() { |
| 1888 | case OpCode::Id::FMUL_R: | 711 | if (is_extra && is_extra_int) { |
| 1889 | case OpCode::Id::FMUL_IMM: { | 712 | if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) { |
| 1890 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | 713 | return std::to_string(static_cast<s32>(immediate->GetValue())); |
| 1891 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, | ||
| 1892 | "FMUL tab5cb8_2({}) is not implemented", | ||
| 1893 | instr.fmul.tab5cb8_2.Value()); | ||
| 1894 | UNIMPLEMENTED_IF_MSG( | ||
| 1895 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | ||
| 1896 | instr.fmul.tab5c68_0 | ||
| 1897 | .Value()); // SMO typical sends 1 here which seems to be the default | ||
| 1898 | |||
| 1899 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); | ||
| 1900 | |||
| 1901 | std::string postfactor_op; | ||
| 1902 | if (instr.fmul.postfactor != 0) { | ||
| 1903 | s8 postfactor = static_cast<s8>(instr.fmul.postfactor); | ||
| 1904 | |||
| 1905 | // postfactor encoded as 3-bit 1's complement in instruction, | ||
| 1906 | // interpreted with below logic. | ||
| 1907 | if (postfactor >= 4) { | ||
| 1908 | postfactor = 7 - postfactor; | ||
| 1909 | } else { | 714 | } else { |
| 1910 | postfactor = 0 - postfactor; | 715 | return "ftoi(" + Visit(operation[i]) + ')'; |
| 1911 | } | 716 | } |
| 1912 | |||
| 1913 | if (postfactor > 0) { | ||
| 1914 | postfactor_op = " * " + std::to_string(1 << postfactor); | ||
| 1915 | } else { | ||
| 1916 | postfactor_op = " / " + std::to_string(1 << -postfactor); | ||
| 1917 | } | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1, | ||
| 1921 | instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 1922 | break; | ||
| 1923 | } | ||
| 1924 | case OpCode::Id::FADD_C: | ||
| 1925 | case OpCode::Id::FADD_R: | ||
| 1926 | case OpCode::Id::FADD_IMM: { | ||
| 1927 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1928 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1929 | |||
| 1930 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, | ||
| 1931 | instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 1932 | break; | ||
| 1933 | } | ||
| 1934 | case OpCode::Id::MUFU: { | ||
| 1935 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1936 | switch (instr.sub_op) { | ||
| 1937 | case SubOp::Cos: | ||
| 1938 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | ||
| 1939 | instr.alu.saturate_d, false, 0, true); | ||
| 1940 | break; | ||
| 1941 | case SubOp::Sin: | ||
| 1942 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | ||
| 1943 | instr.alu.saturate_d, false, 0, true); | ||
| 1944 | break; | ||
| 1945 | case SubOp::Ex2: | ||
| 1946 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | ||
| 1947 | instr.alu.saturate_d, false, 0, true); | ||
| 1948 | break; | ||
| 1949 | case SubOp::Lg2: | ||
| 1950 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | ||
| 1951 | instr.alu.saturate_d, false, 0, true); | ||
| 1952 | break; | ||
| 1953 | case SubOp::Rcp: | ||
| 1954 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, | ||
| 1955 | instr.alu.saturate_d, false, 0, true); | ||
| 1956 | break; | ||
| 1957 | case SubOp::Rsq: | ||
| 1958 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | ||
| 1959 | instr.alu.saturate_d, false, 0, true); | ||
| 1960 | break; | ||
| 1961 | case SubOp::Sqrt: | ||
| 1962 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, | ||
| 1963 | instr.alu.saturate_d, false, 0, true); | ||
| 1964 | break; | ||
| 1965 | default: | ||
| 1966 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", | ||
| 1967 | static_cast<unsigned>(instr.sub_op.Value())); | ||
| 1968 | } | ||
| 1969 | break; | ||
| 1970 | } | ||
| 1971 | case OpCode::Id::FMNMX_C: | ||
| 1972 | case OpCode::Id::FMNMX_R: | ||
| 1973 | case OpCode::Id::FMNMX_IMM: { | ||
| 1974 | UNIMPLEMENTED_IF_MSG( | ||
| 1975 | instr.generates_cc, | ||
| 1976 | "Condition codes generation in FMNMX is partially implemented"); | ||
| 1977 | |||
| 1978 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1979 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1980 | |||
| 1981 | std::string condition = | ||
| 1982 | GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 1983 | std::string parameters = op_a + ',' + op_b; | ||
| 1984 | regs.SetRegisterToFloat(instr.gpr0, 0, | ||
| 1985 | '(' + condition + ") ? min(" + parameters + ") : max(" + | ||
| 1986 | parameters + ')', | ||
| 1987 | 1, 1, false, instr.generates_cc, 0, true); | ||
| 1988 | break; | ||
| 1989 | } | ||
| 1990 | case OpCode::Id::RRO_C: | ||
| 1991 | case OpCode::Id::RRO_R: | ||
| 1992 | case OpCode::Id::RRO_IMM: { | ||
| 1993 | // Currently RRO is only implemented as a register move. | ||
| 1994 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1995 | regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); | ||
| 1996 | LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); | ||
| 1997 | break; | ||
| 1998 | } | ||
| 1999 | default: { | ||
| 2000 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 2001 | } | ||
| 2002 | } | ||
| 2003 | break; | ||
| 2004 | } | ||
| 2005 | case OpCode::Type::ArithmeticImmediate: { | ||
| 2006 | switch (opcode->get().GetId()) { | ||
| 2007 | case OpCode::Id::MOV32_IMM: { | ||
| 2008 | regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1); | ||
| 2009 | break; | ||
| 2010 | } | ||
| 2011 | case OpCode::Id::FMUL32_IMM: { | ||
| 2012 | regs.SetRegisterToFloat( | ||
| 2013 | instr.gpr0, 0, | ||
| 2014 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1, | ||
| 2015 | instr.fmul32.saturate, instr.op_32.generates_cc, 0, true); | ||
| 2016 | break; | ||
| 2017 | } | ||
| 2018 | case OpCode::Id::FADD32I: { | ||
| 2019 | UNIMPLEMENTED_IF_MSG( | ||
| 2020 | instr.op_32.generates_cc, | ||
| 2021 | "Condition codes generation in FADD32I is partially implemented"); | ||
| 2022 | |||
| 2023 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2024 | std::string op_b = GetImmediate32(instr); | ||
| 2025 | |||
| 2026 | if (instr.fadd32i.abs_a) { | ||
| 2027 | op_a = "abs(" + op_a + ')'; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | if (instr.fadd32i.negate_a) { | ||
| 2031 | op_a = "-(" + op_a + ')'; | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | if (instr.fadd32i.abs_b) { | ||
| 2035 | op_b = "abs(" + op_b + ')'; | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | if (instr.fadd32i.negate_b) { | ||
| 2039 | op_b = "-(" + op_b + ')'; | ||
| 2040 | } | ||
| 2041 | |||
| 2042 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, | ||
| 2043 | instr.op_32.generates_cc, 0, true); | ||
| 2044 | break; | ||
| 2045 | } | ||
| 2046 | } | ||
| 2047 | break; | ||
| 2048 | } | ||
| 2049 | case OpCode::Type::Bfe: { | ||
| 2050 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 2051 | |||
| 2052 | std::string op_a = instr.bfe.negate_a ? "-" : ""; | ||
| 2053 | op_a += regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2054 | |||
| 2055 | switch (opcode->get().GetId()) { | ||
| 2056 | case OpCode::Id::BFE_IMM: { | ||
| 2057 | std::string inner_shift = | ||
| 2058 | '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; | ||
| 2059 | std::string outer_shift = | ||
| 2060 | '(' + inner_shift + " >> " + | ||
| 2061 | std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; | ||
| 2062 | |||
| 2063 | regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false, | ||
| 2064 | instr.generates_cc); | ||
| 2065 | break; | ||
| 2066 | } | ||
| 2067 | default: { | ||
| 2068 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | ||
| 2069 | } | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | break; | ||
| 2073 | } | ||
| 2074 | case OpCode::Type::Bfi: { | ||
| 2075 | const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { | ||
| 2076 | switch (opcode->get().GetId()) { | ||
| 2077 | case OpCode::Id::BFI_IMM_R: | ||
| 2078 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2079 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 2080 | default: | ||
| 2081 | UNREACHABLE(); | ||
| 2082 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2083 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 2084 | } | ||
| 2085 | }(); | ||
| 2086 | const std::string offset = '(' + packed_shift + " & 0xff)"; | ||
| 2087 | const std::string bits = "((" + packed_shift + " >> 8) & 0xff)"; | ||
| 2088 | const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false); | ||
| 2089 | regs.SetRegisterToInteger(instr.gpr0, false, 0, | ||
| 2090 | "bitfieldInsert(" + base + ", " + insert + ", " + offset + | ||
| 2091 | ", " + bits + ')', | ||
| 2092 | 1, 1, false, instr.generates_cc); | ||
| 2093 | break; | ||
| 2094 | } | ||
| 2095 | case OpCode::Type::Shift: { | ||
| 2096 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); | ||
| 2097 | std::string op_b; | ||
| 2098 | |||
| 2099 | if (instr.is_b_imm) { | ||
| 2100 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 2101 | } else { | ||
| 2102 | if (instr.is_b_gpr) { | ||
| 2103 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2104 | } else { | 717 | } else { |
| 2105 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | 718 | return Visit(operation[i]); |
| 2106 | GLSLRegister::Type::Integer); | ||
| 2107 | } | 719 | } |
| 720 | }(); | ||
| 721 | if (is_array) { | ||
| 722 | ASSERT(!is_extra); | ||
| 723 | operand = "float(ftoi(" + operand + "))"; | ||
| 2108 | } | 724 | } |
| 2109 | 725 | ||
| 2110 | switch (opcode->get().GetId()) { | 726 | expr += operand; |
| 2111 | case OpCode::Id::SHR_C: | ||
| 2112 | case OpCode::Id::SHR_R: | ||
| 2113 | case OpCode::Id::SHR_IMM: { | ||
| 2114 | if (!instr.shift.is_signed) { | ||
| 2115 | // Logical shift right | ||
| 2116 | op_a = "uint(" + op_a + ')'; | ||
| 2117 | } | ||
| 2118 | 727 | ||
| 2119 | // Cast to int is superfluous for arithmetic shift, it's only for a logical shift | 728 | if (i + 1 == meta->coords_count) { |
| 2120 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', | 729 | expr += ')'; |
| 2121 | 1, 1, false, instr.generates_cc); | ||
| 2122 | break; | ||
| 2123 | } | ||
| 2124 | case OpCode::Id::SHL_C: | ||
| 2125 | case OpCode::Id::SHL_R: | ||
| 2126 | case OpCode::Id::SHL_IMM: | ||
| 2127 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2128 | "Condition codes generation in SHL is not implemented"); | ||
| 2129 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false, | ||
| 2130 | instr.generates_cc); | ||
| 2131 | break; | ||
| 2132 | default: { | ||
| 2133 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 2134 | } | 730 | } |
| 731 | if (i + 1 < count) { | ||
| 732 | expr += ", "; | ||
| 2135 | } | 733 | } |
| 2136 | break; | ||
| 2137 | } | 734 | } |
| 2138 | case OpCode::Type::ArithmeticIntegerImmediate: { | 735 | expr += ')'; |
| 2139 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); | 736 | return expr; |
| 2140 | std::string op_b = std::to_string(instr.alu.imm20_32.Value()); | 737 | } |
| 2141 | |||
| 2142 | switch (opcode->get().GetId()) { | ||
| 2143 | case OpCode::Id::IADD32I: | ||
| 2144 | UNIMPLEMENTED_IF_MSG( | ||
| 2145 | instr.op_32.generates_cc, | ||
| 2146 | "Condition codes generation in IADD32I is partially implemented"); | ||
| 2147 | |||
| 2148 | if (instr.iadd32i.negate_a) | ||
| 2149 | op_a = "-(" + op_a + ')'; | ||
| 2150 | |||
| 2151 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, | ||
| 2152 | instr.iadd32i.saturate, instr.op_32.generates_cc); | ||
| 2153 | break; | ||
| 2154 | case OpCode::Id::LOP32I: { | ||
| 2155 | |||
| 2156 | if (instr.alu.lop32i.invert_a) | ||
| 2157 | op_a = "~(" + op_a + ')'; | ||
| 2158 | |||
| 2159 | if (instr.alu.lop32i.invert_b) | ||
| 2160 | op_b = "~(" + op_b + ')'; | ||
| 2161 | |||
| 2162 | WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, | ||
| 2163 | Tegra::Shader::PredicateResultMode::None, | ||
| 2164 | Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc); | ||
| 2165 | break; | ||
| 2166 | } | ||
| 2167 | default: { | ||
| 2168 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 2169 | opcode->get().GetName()); | ||
| 2170 | } | ||
| 2171 | } | ||
| 2172 | break; | ||
| 2173 | } | ||
| 2174 | case OpCode::Type::ArithmeticInteger: { | ||
| 2175 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2176 | std::string op_b; | ||
| 2177 | if (instr.is_b_imm) { | ||
| 2178 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 2179 | } else { | ||
| 2180 | if (instr.is_b_gpr) { | ||
| 2181 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2182 | } else { | ||
| 2183 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2184 | GLSLRegister::Type::Integer); | ||
| 2185 | } | ||
| 2186 | } | ||
| 2187 | |||
| 2188 | switch (opcode->get().GetId()) { | ||
| 2189 | case OpCode::Id::IADD_C: | ||
| 2190 | case OpCode::Id::IADD_R: | ||
| 2191 | case OpCode::Id::IADD_IMM: { | ||
| 2192 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2193 | "Condition codes generation in IADD is partially implemented"); | ||
| 2194 | |||
| 2195 | if (instr.alu_integer.negate_a) | ||
| 2196 | op_a = "-(" + op_a + ')'; | ||
| 2197 | |||
| 2198 | if (instr.alu_integer.negate_b) | ||
| 2199 | op_b = "-(" + op_b + ')'; | ||
| 2200 | |||
| 2201 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, | ||
| 2202 | instr.alu.saturate_d, instr.generates_cc); | ||
| 2203 | break; | ||
| 2204 | } | ||
| 2205 | case OpCode::Id::IADD3_C: | ||
| 2206 | case OpCode::Id::IADD3_R: | ||
| 2207 | case OpCode::Id::IADD3_IMM: { | ||
| 2208 | UNIMPLEMENTED_IF_MSG( | ||
| 2209 | instr.generates_cc, | ||
| 2210 | "Condition codes generation in IADD3 is partially implemented"); | ||
| 2211 | |||
| 2212 | std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2213 | |||
| 2214 | auto apply_height = [](auto height, auto& oprand) { | ||
| 2215 | switch (height) { | ||
| 2216 | case Tegra::Shader::IAdd3Height::None: | ||
| 2217 | break; | ||
| 2218 | case Tegra::Shader::IAdd3Height::LowerHalfWord: | ||
| 2219 | oprand = "((" + oprand + ") & 0xFFFF)"; | ||
| 2220 | break; | ||
| 2221 | case Tegra::Shader::IAdd3Height::UpperHalfWord: | ||
| 2222 | oprand = "((" + oprand + ") >> 16)"; | ||
| 2223 | break; | ||
| 2224 | default: | ||
| 2225 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", | ||
| 2226 | static_cast<u32>(height.Value())); | ||
| 2227 | } | ||
| 2228 | }; | ||
| 2229 | |||
| 2230 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 2231 | apply_height(instr.iadd3.height_a, op_a); | ||
| 2232 | apply_height(instr.iadd3.height_b, op_b); | ||
| 2233 | apply_height(instr.iadd3.height_c, op_c); | ||
| 2234 | } | ||
| 2235 | |||
| 2236 | if (instr.iadd3.neg_a) | ||
| 2237 | op_a = "-(" + op_a + ')'; | ||
| 2238 | |||
| 2239 | if (instr.iadd3.neg_b) | ||
| 2240 | op_b = "-(" + op_b + ')'; | ||
| 2241 | |||
| 2242 | if (instr.iadd3.neg_c) | ||
| 2243 | op_c = "-(" + op_c + ')'; | ||
| 2244 | |||
| 2245 | std::string result; | ||
| 2246 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 2247 | switch (instr.iadd3.mode) { | ||
| 2248 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 2249 | // TODO(tech4me): According to | ||
| 2250 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 2251 | // The addition between op_a and op_b should be done in uint33, more | ||
| 2252 | // investigation required | ||
| 2253 | result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')'; | ||
| 2254 | break; | ||
| 2255 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 2256 | result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')'; | ||
| 2257 | break; | ||
| 2258 | default: | ||
| 2259 | result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; | ||
| 2260 | break; | ||
| 2261 | } | ||
| 2262 | } else { | ||
| 2263 | result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false, | ||
| 2267 | instr.generates_cc); | ||
| 2268 | break; | ||
| 2269 | } | ||
| 2270 | case OpCode::Id::ISCADD_C: | ||
| 2271 | case OpCode::Id::ISCADD_R: | ||
| 2272 | case OpCode::Id::ISCADD_IMM: { | ||
| 2273 | UNIMPLEMENTED_IF_MSG( | ||
| 2274 | instr.generates_cc, | ||
| 2275 | "Condition codes generation in ISCADD is partially implemented"); | ||
| 2276 | |||
| 2277 | if (instr.alu_integer.negate_a) | ||
| 2278 | op_a = "-(" + op_a + ')'; | ||
| 2279 | |||
| 2280 | if (instr.alu_integer.negate_b) | ||
| 2281 | op_b = "-(" + op_b + ')'; | ||
| 2282 | |||
| 2283 | const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); | ||
| 2284 | |||
| 2285 | regs.SetRegisterToInteger(instr.gpr0, true, 0, | ||
| 2286 | "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1, | ||
| 2287 | false, instr.generates_cc); | ||
| 2288 | break; | ||
| 2289 | } | ||
| 2290 | case OpCode::Id::POPC_C: | ||
| 2291 | case OpCode::Id::POPC_R: | ||
| 2292 | case OpCode::Id::POPC_IMM: { | ||
| 2293 | if (instr.popc.invert) { | ||
| 2294 | op_b = "~(" + op_b + ')'; | ||
| 2295 | } | ||
| 2296 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1); | ||
| 2297 | break; | ||
| 2298 | } | ||
| 2299 | case OpCode::Id::SEL_C: | ||
| 2300 | case OpCode::Id::SEL_R: | ||
| 2301 | case OpCode::Id::SEL_IMM: { | ||
| 2302 | const std::string condition = | ||
| 2303 | GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 2304 | regs.SetRegisterToInteger(instr.gpr0, true, 0, | ||
| 2305 | '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1); | ||
| 2306 | break; | ||
| 2307 | } | ||
| 2308 | case OpCode::Id::LOP_C: | ||
| 2309 | case OpCode::Id::LOP_R: | ||
| 2310 | case OpCode::Id::LOP_IMM: { | ||
| 2311 | |||
| 2312 | if (instr.alu.lop.invert_a) | ||
| 2313 | op_a = "~(" + op_a + ')'; | ||
| 2314 | |||
| 2315 | if (instr.alu.lop.invert_b) | ||
| 2316 | op_b = "~(" + op_b + ')'; | ||
| 2317 | |||
| 2318 | WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 2319 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 2320 | instr.generates_cc); | ||
| 2321 | break; | ||
| 2322 | } | ||
| 2323 | case OpCode::Id::LOP3_C: | ||
| 2324 | case OpCode::Id::LOP3_R: | ||
| 2325 | case OpCode::Id::LOP3_IMM: { | ||
| 2326 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2327 | std::string lut; | ||
| 2328 | |||
| 2329 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 2330 | lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')'; | ||
| 2331 | } else { | ||
| 2332 | lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; | ||
| 2333 | } | ||
| 2334 | |||
| 2335 | WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 2336 | break; | ||
| 2337 | } | ||
| 2338 | case OpCode::Id::IMNMX_C: | ||
| 2339 | case OpCode::Id::IMNMX_R: | ||
| 2340 | case OpCode::Id::IMNMX_IMM: { | ||
| 2341 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 2342 | UNIMPLEMENTED_IF_MSG( | ||
| 2343 | instr.generates_cc, | ||
| 2344 | "Condition codes generation in IMNMX is partially implemented"); | ||
| 2345 | |||
| 2346 | const std::string condition = | ||
| 2347 | GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 2348 | const std::string parameters = op_a + ',' + op_b; | ||
| 2349 | regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, | ||
| 2350 | '(' + condition + ") ? min(" + parameters + ") : max(" + | ||
| 2351 | parameters + ')', | ||
| 2352 | 1, 1, false, instr.generates_cc); | ||
| 2353 | break; | ||
| 2354 | } | ||
| 2355 | case OpCode::Id::LEA_R2: | ||
| 2356 | case OpCode::Id::LEA_R1: | ||
| 2357 | case OpCode::Id::LEA_IMM: | ||
| 2358 | case OpCode::Id::LEA_RZ: | ||
| 2359 | case OpCode::Id::LEA_HI: { | ||
| 2360 | std::string op_c; | ||
| 2361 | |||
| 2362 | switch (opcode->get().GetId()) { | ||
| 2363 | case OpCode::Id::LEA_R2: { | ||
| 2364 | op_a = regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2365 | op_b = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2366 | op_c = std::to_string(instr.lea.r2.entry_a); | ||
| 2367 | break; | ||
| 2368 | } | ||
| 2369 | |||
| 2370 | case OpCode::Id::LEA_R1: { | ||
| 2371 | const bool neg = instr.lea.r1.neg != 0; | ||
| 2372 | op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2373 | if (neg) | ||
| 2374 | op_a = "-(" + op_a + ')'; | ||
| 2375 | op_b = regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2376 | op_c = std::to_string(instr.lea.r1.entry_a); | ||
| 2377 | break; | ||
| 2378 | } | ||
| 2379 | |||
| 2380 | case OpCode::Id::LEA_IMM: { | ||
| 2381 | const bool neg = instr.lea.imm.neg != 0; | ||
| 2382 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2383 | if (neg) | ||
| 2384 | op_b = "-(" + op_b + ')'; | ||
| 2385 | op_a = std::to_string(instr.lea.imm.entry_a); | ||
| 2386 | op_c = std::to_string(instr.lea.imm.entry_b); | ||
| 2387 | break; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | case OpCode::Id::LEA_RZ: { | ||
| 2391 | const bool neg = instr.lea.rz.neg != 0; | ||
| 2392 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2393 | if (neg) | ||
| 2394 | op_b = "-(" + op_b + ')'; | ||
| 2395 | op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset, | ||
| 2396 | GLSLRegister::Type::Integer); | ||
| 2397 | op_c = std::to_string(instr.lea.rz.entry_a); | ||
| 2398 | |||
| 2399 | break; | ||
| 2400 | } | ||
| 2401 | 738 | ||
| 2402 | case OpCode::Id::LEA_HI: | 739 | std::string Assign(Operation operation) { |
| 2403 | default: { | 740 | const Node dest = operation[0]; |
| 2404 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | 741 | const Node src = operation[1]; |
| 2405 | op_a = std::to_string(instr.lea.imm.entry_a); | ||
| 2406 | op_c = std::to_string(instr.lea.imm.entry_b); | ||
| 2407 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 2408 | } | ||
| 2409 | } | ||
| 2410 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 2411 | "Unhandled LEA Predicate"); | ||
| 2412 | const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; | ||
| 2413 | regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false, | ||
| 2414 | instr.generates_cc); | ||
| 2415 | 742 | ||
| 2416 | break; | 743 | std::string target; |
| 2417 | } | 744 | if (const auto gpr = std::get_if<GprNode>(dest)) { |
| 2418 | default: { | 745 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 2419 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", | 746 | // Writing to Register::ZeroIndex is a no op |
| 2420 | opcode->get().GetName()); | 747 | return {}; |
| 2421 | } | ||
| 2422 | } | 748 | } |
| 749 | target = GetRegister(gpr->GetIndex()); | ||
| 2423 | 750 | ||
| 2424 | break; | 751 | } else if (const auto abuf = std::get_if<AbufNode>(dest)) { |
| 2425 | } | 752 | target = [&]() -> std::string { |
| 2426 | case OpCode::Type::ArithmeticHalf: { | 753 | switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { |
| 2427 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | 754 | case Attribute::Index::Position: |
| 2428 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | 755 | return "position" + GetSwizzle(abuf->GetElement()); |
| 2429 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | 756 | case Attribute::Index::PointSize: |
| 2430 | } | 757 | return "gl_PointSize"; |
| 2431 | const bool negate_a = | 758 | case Attribute::Index::ClipDistances0123: |
| 2432 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | 759 | return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']'; |
| 2433 | const bool negate_b = | 760 | case Attribute::Index::ClipDistances4567: |
| 2434 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | 761 | return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']'; |
| 2435 | |||
| 2436 | const std::string op_a = | ||
| 2437 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a, | ||
| 2438 | instr.alu_half.abs_a != 0, negate_a); | ||
| 2439 | |||
| 2440 | std::string op_b; | ||
| 2441 | switch (opcode->get().GetId()) { | ||
| 2442 | case OpCode::Id::HADD2_C: | ||
| 2443 | case OpCode::Id::HMUL2_C: | ||
| 2444 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2445 | GLSLRegister::Type::UnsignedInteger); | ||
| 2446 | break; | ||
| 2447 | case OpCode::Id::HADD2_R: | ||
| 2448 | case OpCode::Id::HMUL2_R: | ||
| 2449 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false); | ||
| 2450 | break; | ||
| 2451 | default: | ||
| 2452 | UNREACHABLE(); | ||
| 2453 | op_b = "0"; | ||
| 2454 | break; | ||
| 2455 | } | ||
| 2456 | op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b); | ||
| 2457 | |||
| 2458 | const std::string result = [&]() { | ||
| 2459 | switch (opcode->get().GetId()) { | ||
| 2460 | case OpCode::Id::HADD2_C: | ||
| 2461 | case OpCode::Id::HADD2_R: | ||
| 2462 | return '(' + op_a + " + " + op_b + ')'; | ||
| 2463 | case OpCode::Id::HMUL2_C: | ||
| 2464 | case OpCode::Id::HMUL2_R: | ||
| 2465 | return '(' + op_a + " * " + op_b + ')'; | ||
| 2466 | default: | 762 | default: |
| 2467 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", | 763 | if (attribute >= Attribute::Index::Attribute_0 && |
| 2468 | opcode->get().GetName()); | 764 | attribute <= Attribute::Index::Attribute_31) { |
| 2469 | return std::string("0"); | 765 | return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); |
| 2470 | } | 766 | } |
| 2471 | }(); | 767 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", |
| 2472 | 768 | static_cast<u32>(attribute)); | |
| 2473 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1, | 769 | return "0"; |
| 2474 | instr.alu_half.saturate != 0); | ||
| 2475 | break; | ||
| 2476 | } | ||
| 2477 | case OpCode::Type::ArithmeticHalfImmediate: { | ||
| 2478 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 2479 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | ||
| 2480 | } else { | ||
| 2481 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != | ||
| 2482 | Tegra::Shader::HalfPrecision::None); | ||
| 2483 | } | ||
| 2484 | |||
| 2485 | const std::string op_a = GetHalfFloat( | ||
| 2486 | regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a, | ||
| 2487 | instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0); | ||
| 2488 | |||
| 2489 | const std::string op_b = UnpackHalfImmediate(instr, true); | ||
| 2490 | |||
| 2491 | const std::string result = [&]() { | ||
| 2492 | switch (opcode->get().GetId()) { | ||
| 2493 | case OpCode::Id::HADD2_IMM: | ||
| 2494 | return op_a + " + " + op_b; | ||
| 2495 | case OpCode::Id::HMUL2_IMM: | ||
| 2496 | return op_a + " * " + op_b; | ||
| 2497 | default: | ||
| 2498 | UNREACHABLE(); | ||
| 2499 | return std::string("0"); | ||
| 2500 | } | 770 | } |
| 2501 | }(); | 771 | }(); |
| 2502 | 772 | ||
| 2503 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1, | 773 | } else if (const auto lmem = std::get_if<LmemNode>(dest)) { |
| 2504 | instr.alu_half_imm.saturate != 0); | 774 | target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; |
| 2505 | break; | ||
| 2506 | } | ||
| 2507 | case OpCode::Type::Ffma: { | ||
| 2508 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2509 | std::string op_b = instr.ffma.negate_b ? "-" : ""; | ||
| 2510 | std::string op_c = instr.ffma.negate_c ? "-" : ""; | ||
| 2511 | |||
| 2512 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 2513 | UNIMPLEMENTED_IF_MSG( | ||
| 2514 | instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | ||
| 2515 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | ||
| 2516 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | ||
| 2517 | instr.ffma.tab5980_1.Value()); | ||
| 2518 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2519 | "Condition codes generation in FFMA is partially implemented"); | ||
| 2520 | |||
| 2521 | switch (opcode->get().GetId()) { | ||
| 2522 | case OpCode::Id::FFMA_CR: { | ||
| 2523 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2524 | GLSLRegister::Type::Float); | ||
| 2525 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2526 | break; | ||
| 2527 | } | ||
| 2528 | case OpCode::Id::FFMA_RR: { | ||
| 2529 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2530 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2531 | break; | ||
| 2532 | } | ||
| 2533 | case OpCode::Id::FFMA_RC: { | ||
| 2534 | op_b += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2535 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2536 | GLSLRegister::Type::Float); | ||
| 2537 | break; | ||
| 2538 | } | ||
| 2539 | case OpCode::Id::FFMA_IMM: { | ||
| 2540 | op_b += GetImmediate19(instr); | ||
| 2541 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2542 | break; | ||
| 2543 | } | ||
| 2544 | default: { | ||
| 2545 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 2546 | } | ||
| 2547 | } | ||
| 2548 | 775 | ||
| 2549 | regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', | 776 | } else { |
| 2550 | 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true); | 777 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 2551 | break; | ||
| 2552 | } | 778 | } |
| 2553 | case OpCode::Type::Hfma2: { | ||
| 2554 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 2555 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None); | ||
| 2556 | } else { | ||
| 2557 | UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None); | ||
| 2558 | } | ||
| 2559 | const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR | ||
| 2560 | ? instr.hfma2.rr.saturate != 0 | ||
| 2561 | : instr.hfma2.saturate != 0; | ||
| 2562 | |||
| 2563 | const std::string op_a = | ||
| 2564 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); | ||
| 2565 | std::string op_b, op_c; | ||
| 2566 | |||
| 2567 | switch (opcode->get().GetId()) { | ||
| 2568 | case OpCode::Id::HFMA2_CR: | ||
| 2569 | op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2570 | GLSLRegister::Type::UnsignedInteger), | ||
| 2571 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2572 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2573 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2574 | break; | ||
| 2575 | case OpCode::Id::HFMA2_RC: | ||
| 2576 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2577 | instr.hfma2.type_reg39, false, instr.hfma2.negate_b); | ||
| 2578 | op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2579 | GLSLRegister::Type::UnsignedInteger), | ||
| 2580 | instr.hfma2.type_b, false, instr.hfma2.negate_c); | ||
| 2581 | break; | ||
| 2582 | case OpCode::Id::HFMA2_RR: | ||
| 2583 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 2584 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2585 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2586 | instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c); | ||
| 2587 | break; | ||
| 2588 | case OpCode::Id::HFMA2_IMM_R: | ||
| 2589 | op_b = UnpackHalfImmediate(instr, true); | ||
| 2590 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2591 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2592 | break; | ||
| 2593 | default: | ||
| 2594 | UNREACHABLE(); | ||
| 2595 | op_c = op_b = "vec2(0)"; | ||
| 2596 | break; | ||
| 2597 | } | ||
| 2598 | 779 | ||
| 2599 | const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | 780 | code.AddLine(target + " = " + Visit(src) + ';'); |
| 781 | return {}; | ||
| 782 | } | ||
| 2600 | 783 | ||
| 2601 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); | 784 | std::string Composite(Operation operation) { |
| 2602 | break; | 785 | std::string value = "vec4("; |
| 786 | for (std::size_t i = 0; i < 4; ++i) { | ||
| 787 | value += Visit(operation[i]); | ||
| 788 | if (i < 3) | ||
| 789 | value += ", "; | ||
| 2603 | } | 790 | } |
| 2604 | case OpCode::Type::Conversion: { | 791 | value += ')'; |
| 2605 | switch (opcode->get().GetId()) { | 792 | return value; |
| 2606 | case OpCode::Id::I2I_R: { | 793 | } |
| 2607 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 2608 | |||
| 2609 | std::string op_a = regs.GetRegisterAsInteger( | ||
| 2610 | instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); | ||
| 2611 | 794 | ||
| 2612 | if (instr.conversion.abs_a) { | 795 | template <Type type> |
| 2613 | op_a = "abs(" + op_a + ')'; | 796 | std::string Add(Operation operation) { |
| 2614 | } | 797 | return GenerateBinaryInfix(operation, "+", type, type, type); |
| 798 | } | ||
| 2615 | 799 | ||
| 2616 | if (instr.conversion.negate_a) { | 800 | template <Type type> |
| 2617 | op_a = "-(" + op_a + ')'; | 801 | std::string Mul(Operation operation) { |
| 2618 | } | 802 | return GenerateBinaryInfix(operation, "*", type, type, type); |
| 803 | } | ||
| 2619 | 804 | ||
| 2620 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | 805 | template <Type type> |
| 2621 | 1, instr.alu.saturate_d, instr.generates_cc, 0, | 806 | std::string Div(Operation operation) { |
| 2622 | instr.conversion.dest_size); | 807 | return GenerateBinaryInfix(operation, "/", type, type, type); |
| 2623 | break; | 808 | } |
| 2624 | } | ||
| 2625 | case OpCode::Id::I2F_R: | ||
| 2626 | case OpCode::Id::I2F_C: { | ||
| 2627 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 2628 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 2629 | std::string op_a; | ||
| 2630 | |||
| 2631 | if (instr.is_b_gpr) { | ||
| 2632 | op_a = | ||
| 2633 | regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed, | ||
| 2634 | instr.conversion.src_size); | ||
| 2635 | } else { | ||
| 2636 | op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2637 | instr.conversion.is_input_signed | ||
| 2638 | ? GLSLRegister::Type::Integer | ||
| 2639 | : GLSLRegister::Type::UnsignedInteger, | ||
| 2640 | instr.conversion.src_size); | ||
| 2641 | } | ||
| 2642 | 809 | ||
| 2643 | if (instr.conversion.abs_a) { | 810 | template <Type type> |
| 2644 | op_a = "abs(" + op_a + ')'; | 811 | std::string Fma(Operation operation) { |
| 2645 | } | 812 | return GenerateTernary(operation, "fma", type, type, type, type); |
| 813 | } | ||
| 2646 | 814 | ||
| 2647 | if (instr.conversion.negate_a) { | 815 | template <Type type> |
| 2648 | op_a = "-(" + op_a + ')'; | 816 | std::string Negate(Operation operation) { |
| 2649 | } | 817 | return GenerateUnary(operation, "-", type, type, true); |
| 818 | } | ||
| 2650 | 819 | ||
| 2651 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc); | 820 | template <Type type> |
| 2652 | break; | 821 | std::string Absolute(Operation operation) { |
| 2653 | } | 822 | return GenerateUnary(operation, "abs", type, type, false); |
| 2654 | case OpCode::Id::F2F_R: { | 823 | } |
| 2655 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 2656 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 2657 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2658 | 824 | ||
| 2659 | if (instr.conversion.abs_a) { | 825 | std::string FClamp(Operation operation) { |
| 2660 | op_a = "abs(" + op_a + ')'; | 826 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, |
| 2661 | } | 827 | Type::Float); |
| 828 | } | ||
| 2662 | 829 | ||
| 2663 | if (instr.conversion.negate_a) { | 830 | template <Type type> |
| 2664 | op_a = "-(" + op_a + ')'; | 831 | std::string Min(Operation operation) { |
| 2665 | } | 832 | return GenerateBinaryCall(operation, "min", type, type, type); |
| 833 | } | ||
| 2666 | 834 | ||
| 2667 | switch (instr.conversion.f2f.rounding) { | 835 | template <Type type> |
| 2668 | case Tegra::Shader::F2fRoundingOp::None: | 836 | std::string Max(Operation operation) { |
| 2669 | break; | 837 | return GenerateBinaryCall(operation, "max", type, type, type); |
| 2670 | case Tegra::Shader::F2fRoundingOp::Round: | 838 | } |
| 2671 | op_a = "roundEven(" + op_a + ')'; | ||
| 2672 | break; | ||
| 2673 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 2674 | op_a = "floor(" + op_a + ')'; | ||
| 2675 | break; | ||
| 2676 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 2677 | op_a = "ceil(" + op_a + ')'; | ||
| 2678 | break; | ||
| 2679 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 2680 | op_a = "trunc(" + op_a + ')'; | ||
| 2681 | break; | ||
| 2682 | default: | ||
| 2683 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 2684 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||
| 2685 | break; | ||
| 2686 | } | ||
| 2687 | 839 | ||
| 2688 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d, | 840 | std::string Select(Operation operation) { |
| 2689 | instr.generates_cc); | 841 | const std::string condition = Visit(operation[0]); |
| 2690 | break; | 842 | const std::string true_case = Visit(operation[1]); |
| 2691 | } | 843 | const std::string false_case = Visit(operation[2]); |
| 2692 | case OpCode::Id::F2I_R: | 844 | return ApplyPrecise(operation, |
| 2693 | case OpCode::Id::F2I_C: { | 845 | '(' + condition + " ? " + true_case + " : " + false_case + ')'); |
| 2694 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 846 | } |
| 2695 | std::string op_a{}; | ||
| 2696 | 847 | ||
| 2697 | if (instr.is_b_gpr) { | 848 | std::string FCos(Operation operation) { |
| 2698 | op_a = regs.GetRegisterAsFloat(instr.gpr20); | 849 | return GenerateUnary(operation, "cos", Type::Float, Type::Float, false); |
| 2699 | } else { | 850 | } |
| 2700 | op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2701 | GLSLRegister::Type::Float); | ||
| 2702 | } | ||
| 2703 | 851 | ||
| 2704 | if (instr.conversion.abs_a) { | 852 | std::string FSin(Operation operation) { |
| 2705 | op_a = "abs(" + op_a + ')'; | 853 | return GenerateUnary(operation, "sin", Type::Float, Type::Float, false); |
| 2706 | } | 854 | } |
| 2707 | 855 | ||
| 2708 | if (instr.conversion.negate_a) { | 856 | std::string FExp2(Operation operation) { |
| 2709 | op_a = "-(" + op_a + ')'; | 857 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false); |
| 2710 | } | 858 | } |
| 2711 | 859 | ||
| 2712 | switch (instr.conversion.f2i.rounding) { | 860 | std::string FLog2(Operation operation) { |
| 2713 | case Tegra::Shader::F2iRoundingOp::None: | 861 | return GenerateUnary(operation, "log2", Type::Float, Type::Float, false); |
| 2714 | break; | 862 | } |
| 2715 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 2716 | op_a = "floor(" + op_a + ')'; | ||
| 2717 | break; | ||
| 2718 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 2719 | op_a = "ceil(" + op_a + ')'; | ||
| 2720 | break; | ||
| 2721 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 2722 | op_a = "trunc(" + op_a + ')'; | ||
| 2723 | break; | ||
| 2724 | default: | ||
| 2725 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 2726 | static_cast<u32>(instr.conversion.f2i.rounding.Value())); | ||
| 2727 | break; | ||
| 2728 | } | ||
| 2729 | 863 | ||
| 2730 | if (instr.conversion.is_output_signed) { | 864 | std::string FInverseSqrt(Operation operation) { |
| 2731 | op_a = "int(" + op_a + ')'; | 865 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false); |
| 2732 | } else { | 866 | } |
| 2733 | op_a = "uint(" + op_a + ')'; | ||
| 2734 | } | ||
| 2735 | 867 | ||
| 2736 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | 868 | std::string FSqrt(Operation operation) { |
| 2737 | 1, false, instr.generates_cc, 0, | 869 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false); |
| 2738 | instr.conversion.dest_size); | 870 | } |
| 2739 | break; | ||
| 2740 | } | ||
| 2741 | default: { | ||
| 2742 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 2743 | } | ||
| 2744 | } | ||
| 2745 | break; | ||
| 2746 | } | ||
| 2747 | case OpCode::Type::Memory: { | ||
| 2748 | switch (opcode->get().GetId()) { | ||
| 2749 | case OpCode::Id::LD_A: { | ||
| 2750 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 2751 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 2752 | "Indirect attribute loads are not supported"); | ||
| 2753 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 2754 | "Unaligned attribute loads are not supported"); | ||
| 2755 | |||
| 2756 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 2757 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 2758 | |||
| 2759 | u64 next_element = instr.attribute.fmt20.element; | ||
| 2760 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 2761 | |||
| 2762 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 2763 | regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, | ||
| 2764 | static_cast<Attribute::Index>(next_index), | ||
| 2765 | input_mode, instr.gpr39.Value()); | ||
| 2766 | |||
| 2767 | // Load the next attribute element into the following register. If the element | ||
| 2768 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 2769 | // attribute. | ||
| 2770 | next_element = (next_element + 1) % 4; | ||
| 2771 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 2772 | }; | ||
| 2773 | |||
| 2774 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 2775 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 2776 | LoadNextElement(reg_offset); | ||
| 2777 | } | ||
| 2778 | break; | ||
| 2779 | } | ||
| 2780 | case OpCode::Id::LD_C: { | ||
| 2781 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 2782 | |||
| 2783 | const auto scope = shader.Scope(); | ||
| 2784 | |||
| 2785 | shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 2786 | " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); | ||
| 2787 | |||
| 2788 | const std::string op_a = | ||
| 2789 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", | ||
| 2790 | GLSLRegister::Type::Float); | ||
| 2791 | |||
| 2792 | switch (instr.ld_c.type.Value()) { | ||
| 2793 | case Tegra::Shader::UniformType::Single: | ||
| 2794 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2795 | break; | ||
| 2796 | |||
| 2797 | case Tegra::Shader::UniformType::Double: { | ||
| 2798 | const std::string op_b = | ||
| 2799 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, | ||
| 2800 | "index", GLSLRegister::Type::Float); | ||
| 2801 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2802 | regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); | ||
| 2803 | break; | ||
| 2804 | } | ||
| 2805 | default: | ||
| 2806 | UNIMPLEMENTED_MSG("Unhandled type: {}", | ||
| 2807 | static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 2808 | } | ||
| 2809 | break; | ||
| 2810 | } | ||
| 2811 | case OpCode::Id::LD_L: { | ||
| 2812 | UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", | ||
| 2813 | static_cast<unsigned>(instr.ld_l.unknown.Value())); | ||
| 2814 | 871 | ||
| 2815 | const auto scope = shader.Scope(); | 872 | std::string FRoundEven(Operation operation) { |
| 873 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false); | ||
| 874 | } | ||
| 2816 | 875 | ||
| 2817 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | 876 | std::string FFloor(Operation operation) { |
| 2818 | std::to_string(instr.smem_imm.Value()) + ')'; | 877 | return GenerateUnary(operation, "floor", Type::Float, Type::Float, false); |
| 878 | } | ||
| 2819 | 879 | ||
| 2820 | shader.AddLine("uint index = (" + op + " / 4);"); | 880 | std::string FCeil(Operation operation) { |
| 881 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false); | ||
| 882 | } | ||
| 2821 | 883 | ||
| 2822 | const std::string op_a = regs.GetLocalMemoryAsFloat("index"); | 884 | std::string FTrunc(Operation operation) { |
| 885 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false); | ||
| 886 | } | ||
| 2823 | 887 | ||
| 2824 | switch (instr.ldst_sl.type.Value()) { | 888 | template <Type type> |
| 2825 | case Tegra::Shader::StoreType::Bytes32: | 889 | std::string FCastInteger(Operation operation) { |
| 2826 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | 890 | return GenerateUnary(operation, "float", Type::Float, type, false); |
| 2827 | break; | 891 | } |
| 2828 | default: | ||
| 2829 | UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", | ||
| 2830 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2831 | } | ||
| 2832 | break; | ||
| 2833 | } | ||
| 2834 | case OpCode::Id::ST_A: { | ||
| 2835 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 2836 | "Indirect attribute loads are not supported"); | ||
| 2837 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 2838 | "Unaligned attribute loads are not supported"); | ||
| 2839 | |||
| 2840 | u64 next_element = instr.attribute.fmt20.element; | ||
| 2841 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 2842 | |||
| 2843 | const auto StoreNextElement = [&](u32 reg_offset) { | ||
| 2844 | regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), | ||
| 2845 | next_element, instr.gpr0.Value() + reg_offset, | ||
| 2846 | instr.gpr39.Value()); | ||
| 2847 | |||
| 2848 | // Load the next attribute element into the following register. If the element | ||
| 2849 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 2850 | // attribute. | ||
| 2851 | next_element = (next_element + 1) % 4; | ||
| 2852 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 2853 | }; | ||
| 2854 | |||
| 2855 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 2856 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 2857 | StoreNextElement(reg_offset); | ||
| 2858 | } | ||
| 2859 | 892 | ||
| 2860 | break; | 893 | std::string ICastFloat(Operation operation) { |
| 2861 | } | 894 | return GenerateUnary(operation, "int", Type::Int, Type::Float, false); |
| 2862 | case OpCode::Id::ST_L: { | 895 | } |
| 2863 | UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", | ||
| 2864 | static_cast<unsigned>(instr.st_l.unknown.Value())); | ||
| 2865 | 896 | ||
| 2866 | const auto scope = shader.Scope(); | 897 | std::string ICastUnsigned(Operation operation) { |
| 898 | return GenerateUnary(operation, "int", Type::Int, Type::Uint, false); | ||
| 899 | } | ||
| 2867 | 900 | ||
| 2868 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | 901 | template <Type type> |
| 2869 | std::to_string(instr.smem_imm.Value()) + ')'; | 902 | std::string LogicalShiftLeft(Operation operation) { |
| 903 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); | ||
| 904 | } | ||
| 2870 | 905 | ||
| 2871 | shader.AddLine("uint index = (" + op + " / 4);"); | 906 | std::string ILogicalShiftRight(Operation operation) { |
| 907 | const std::string op_a = VisitOperand(operation, 0, Type::Uint); | ||
| 908 | const std::string op_b = VisitOperand(operation, 1, Type::Uint); | ||
| 2872 | 909 | ||
| 2873 | switch (instr.ldst_sl.type.Value()) { | 910 | return ApplyPrecise(operation, |
| 2874 | case Tegra::Shader::StoreType::Bytes32: | 911 | BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int)); |
| 2875 | regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); | 912 | } |
| 2876 | break; | ||
| 2877 | default: | ||
| 2878 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | ||
| 2879 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2880 | } | ||
| 2881 | break; | ||
| 2882 | } | ||
| 2883 | case OpCode::Id::TEX: { | ||
| 2884 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | ||
| 2885 | const bool is_array = instr.tex.array != 0; | ||
| 2886 | const bool depth_compare = | ||
| 2887 | instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2888 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 2889 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2890 | "NODEP is not implemented"); | ||
| 2891 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2892 | "AOFFI is not implemented"); | ||
| 2893 | |||
| 2894 | const auto [coord, texture] = | ||
| 2895 | GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 2896 | |||
| 2897 | const auto scope = shader.Scope(); | ||
| 2898 | shader.AddLine(coord); | ||
| 2899 | |||
| 2900 | if (depth_compare) { | ||
| 2901 | regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1); | ||
| 2902 | } else { | ||
| 2903 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2904 | std::size_t dest_elem{}; | ||
| 2905 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 2906 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 2907 | // Skip disabled components | ||
| 2908 | continue; | ||
| 2909 | } | ||
| 2910 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, | ||
| 2911 | dest_elem); | ||
| 2912 | ++dest_elem; | ||
| 2913 | } | ||
| 2914 | } | ||
| 2915 | break; | ||
| 2916 | } | ||
| 2917 | case OpCode::Id::TEXS: { | ||
| 2918 | Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 2919 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 2920 | const bool depth_compare = | ||
| 2921 | instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2922 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 2923 | 913 | ||
| 2924 | UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | 914 | std::string IArithmeticShiftRight(Operation operation) { |
| 2925 | "NODEP is not implemented"); | 915 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); |
| 916 | } | ||
| 2926 | 917 | ||
| 2927 | const auto scope = shader.Scope(); | 918 | template <Type type> |
| 919 | std::string BitwiseAnd(Operation operation) { | ||
| 920 | return GenerateBinaryInfix(operation, "&", type, type, type); | ||
| 921 | } | ||
| 2928 | 922 | ||
| 2929 | auto [coord, texture] = | 923 | template <Type type> |
| 2930 | GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); | 924 | std::string BitwiseOr(Operation operation) { |
| 925 | return GenerateBinaryInfix(operation, "|", type, type, type); | ||
| 926 | } | ||
| 2931 | 927 | ||
| 2932 | shader.AddLine(coord); | 928 | template <Type type> |
| 929 | std::string BitwiseXor(Operation operation) { | ||
| 930 | return GenerateBinaryInfix(operation, "^", type, type, type); | ||
| 931 | } | ||
| 2933 | 932 | ||
| 2934 | if (depth_compare) { | 933 | template <Type type> |
| 2935 | texture = "vec4(" + texture + ')'; | 934 | std::string BitwiseNot(Operation operation) { |
| 2936 | } | 935 | return GenerateUnary(operation, "~", type, type, false); |
| 2937 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | 936 | } |
| 2938 | 937 | ||
| 2939 | if (instr.texs.fp32_flag) { | 938 | std::string UCastFloat(Operation operation) { |
| 2940 | WriteTexsInstructionFloat(instr, "texture_tmp"); | 939 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false); |
| 2941 | } else { | 940 | } |
| 2942 | WriteTexsInstructionHalfFloat(instr, "texture_tmp"); | ||
| 2943 | } | ||
| 2944 | break; | ||
| 2945 | } | ||
| 2946 | case OpCode::Id::TLDS: { | ||
| 2947 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 2948 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 2949 | 941 | ||
| 2950 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | 942 | std::string UCastSigned(Operation operation) { |
| 2951 | "NODEP is not implemented"); | 943 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false); |
| 2952 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | 944 | } |
| 2953 | "AOFFI is not implemented"); | ||
| 2954 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), | ||
| 2955 | "MZ is not implemented"); | ||
| 2956 | 945 | ||
| 2957 | const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array); | 946 | std::string UShiftRight(Operation operation) { |
| 947 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); | ||
| 948 | } | ||
| 2958 | 949 | ||
| 2959 | const auto scope = shader.Scope(); | 950 | template <Type type> |
| 951 | std::string BitfieldInsert(Operation operation) { | ||
| 952 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, | ||
| 953 | Type::Int); | ||
| 954 | } | ||
| 2960 | 955 | ||
| 2961 | shader.AddLine(coord); | 956 | template <Type type> |
| 2962 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | 957 | std::string BitfieldExtract(Operation operation) { |
| 2963 | WriteTexsInstructionFloat(instr, "texture_tmp"); | 958 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); |
| 2964 | break; | 959 | } |
| 2965 | } | ||
| 2966 | case OpCode::Id::TLD4: { | ||
| 2967 | |||
| 2968 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2969 | "NODEP is not implemented"); | ||
| 2970 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2971 | "AOFFI is not implemented"); | ||
| 2972 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 2973 | "NDV is not implemented"); | ||
| 2974 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), | ||
| 2975 | "PTP is not implemented"); | ||
| 2976 | |||
| 2977 | auto texture_type = instr.tld4.texture_type.Value(); | ||
| 2978 | const bool depth_compare = | ||
| 2979 | instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2980 | const bool is_array = instr.tld4.array != 0; | ||
| 2981 | |||
| 2982 | const auto [coord, texture] = | ||
| 2983 | GetTLD4Code(instr, texture_type, depth_compare, is_array); | ||
| 2984 | |||
| 2985 | const auto scope = shader.Scope(); | ||
| 2986 | |||
| 2987 | shader.AddLine(coord); | ||
| 2988 | std::size_t dest_elem{}; | ||
| 2989 | |||
| 2990 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2991 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 2992 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 2993 | // Skip disabled components | ||
| 2994 | continue; | ||
| 2995 | } | ||
| 2996 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, | ||
| 2997 | dest_elem); | ||
| 2998 | ++dest_elem; | ||
| 2999 | } | ||
| 3000 | break; | ||
| 3001 | } | ||
| 3002 | case OpCode::Id::TLD4S: { | ||
| 3003 | UNIMPLEMENTED_IF_MSG( | ||
| 3004 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3005 | "NODEP is not implemented"); | ||
| 3006 | UNIMPLEMENTED_IF_MSG( | ||
| 3007 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 3008 | "AOFFI is not implemented"); | ||
| 3009 | 960 | ||
| 3010 | const auto scope = shader.Scope(); | 961 | template <Type type> |
| 962 | std::string BitCount(Operation operation) { | ||
| 963 | return GenerateUnary(operation, "bitCount", type, type, false); | ||
| 964 | } | ||
| 3011 | 965 | ||
| 3012 | std::string coords; | 966 | std::string HNegate(Operation operation) { |
| 967 | const auto GetNegate = [&](std::size_t index) -> std::string { | ||
| 968 | return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; | ||
| 969 | }; | ||
| 970 | const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" + | ||
| 971 | GetNegate(1) + ", " + GetNegate(2) + "))"; | ||
| 972 | return BitwiseCastResult(value, Type::HalfFloat); | ||
| 973 | } | ||
| 3013 | 974 | ||
| 3014 | const bool depth_compare = | 975 | std::string HMergeF32(Operation operation) { |
| 3015 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | 976 | return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; |
| 977 | } | ||
| 3016 | 978 | ||
| 3017 | const std::string sampler = GetSampler( | 979 | std::string HMergeH0(Operation operation) { |
| 3018 | instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); | 980 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + |
| 981 | Visit(operation[1]) + ")[0]))"; | ||
| 982 | } | ||
| 3019 | 983 | ||
| 3020 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 984 | std::string HMergeH1(Operation operation) { |
| 3021 | coords = "vec2 coords = vec2(" + op_a + ", "; | 985 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" + |
| 3022 | std::string texture = "textureGather(" + sampler + ", coords, "; | 986 | Visit(operation[1]) + ")[1]))"; |
| 987 | } | ||
| 3023 | 988 | ||
| 3024 | if (!depth_compare) { | 989 | std::string HPack2(Operation operation) { |
| 3025 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); | 990 | return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))"; |
| 3026 | coords += op_b + ");"; | 991 | } |
| 3027 | texture += std::to_string(instr.tld4s.component) + ')'; | ||
| 3028 | } else { | ||
| 3029 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3030 | const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3031 | coords += op_b + ");"; | ||
| 3032 | texture += op_c + ')'; | ||
| 3033 | } | ||
| 3034 | shader.AddLine(coords); | ||
| 3035 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 3036 | WriteTexsInstructionFloat(instr, "texture_tmp"); | ||
| 3037 | break; | ||
| 3038 | } | ||
| 3039 | case OpCode::Id::TXQ: { | ||
| 3040 | UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3041 | "NODEP is not implemented"); | ||
| 3042 | |||
| 3043 | const auto scope = shader.Scope(); | ||
| 3044 | |||
| 3045 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 3046 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 3047 | // uses. This must be fixed at a later instance. | ||
| 3048 | const std::string sampler = | ||
| 3049 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 3050 | switch (instr.txq.query_type) { | ||
| 3051 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 3052 | const std::string texture = "textureSize(" + sampler + ", " + | ||
| 3053 | regs.GetRegisterAsInteger(instr.gpr8) + ')'; | ||
| 3054 | const std::string mip_level = "textureQueryLevels(" + sampler + ')'; | ||
| 3055 | shader.AddLine("ivec2 sizes = " + texture + ';'); | ||
| 3056 | |||
| 3057 | regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1); | ||
| 3058 | regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1); | ||
| 3059 | regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1); | ||
| 3060 | regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1); | ||
| 3061 | break; | ||
| 3062 | } | ||
| 3063 | default: { | ||
| 3064 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 3065 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 3066 | } | ||
| 3067 | } | ||
| 3068 | break; | ||
| 3069 | } | ||
| 3070 | case OpCode::Id::TMML: { | ||
| 3071 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3072 | "NODEP is not implemented"); | ||
| 3073 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 3074 | "NDV is not implemented"); | ||
| 3075 | |||
| 3076 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 3077 | const bool is_array = instr.tmml.array != 0; | ||
| 3078 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 3079 | const std::string sampler = | ||
| 3080 | GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 3081 | |||
| 3082 | const auto scope = shader.Scope(); | ||
| 3083 | |||
| 3084 | // TODO: Add coordinates for different samplers once other texture types are | ||
| 3085 | // implemented. | ||
| 3086 | switch (texture_type) { | ||
| 3087 | case Tegra::Shader::TextureType::Texture1D: { | ||
| 3088 | shader.AddLine("float coords = " + x + ';'); | ||
| 3089 | break; | ||
| 3090 | } | ||
| 3091 | case Tegra::Shader::TextureType::Texture2D: { | ||
| 3092 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3093 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | ||
| 3094 | break; | ||
| 3095 | } | ||
| 3096 | default: | ||
| 3097 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 3098 | 992 | ||
| 3099 | // Fallback to interpreting as a 2D texture for now | 993 | template <Type type> |
| 3100 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 994 | std::string LogicalLessThan(Operation operation) { |
| 3101 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | 995 | return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); |
| 3102 | texture_type = Tegra::Shader::TextureType::Texture2D; | 996 | } |
| 3103 | } | ||
| 3104 | 997 | ||
| 3105 | const std::string texture = "textureQueryLod(" + sampler + ", coords)"; | 998 | template <Type type> |
| 3106 | shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); | 999 | std::string LogicalEqual(Operation operation) { |
| 1000 | return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); | ||
| 1001 | } | ||
| 3107 | 1002 | ||
| 3108 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); | 1003 | template <Type type> |
| 3109 | regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); | 1004 | std::string LogicalLessEqual(Operation operation) { |
| 3110 | break; | 1005 | return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); |
| 3111 | } | 1006 | } |
| 3112 | default: { | ||
| 3113 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 3114 | } | ||
| 3115 | } | ||
| 3116 | break; | ||
| 3117 | } | ||
| 3118 | case OpCode::Type::FloatSetPredicate: { | ||
| 3119 | const std::string op_a = | ||
| 3120 | GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 3121 | instr.fsetp.neg_a != 0); | ||
| 3122 | 1007 | ||
| 3123 | std::string op_b; | 1008 | template <Type type> |
| 1009 | std::string LogicalGreaterThan(Operation operation) { | ||
| 1010 | return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); | ||
| 1011 | } | ||
| 3124 | 1012 | ||
| 3125 | if (instr.is_b_imm) { | 1013 | template <Type type> |
| 3126 | op_b += '(' + GetImmediate19(instr) + ')'; | 1014 | std::string LogicalNotEqual(Operation operation) { |
| 3127 | } else { | 1015 | return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); |
| 3128 | if (instr.is_b_gpr) { | 1016 | } |
| 3129 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3130 | } else { | ||
| 3131 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3132 | GLSLRegister::Type::Float); | ||
| 3133 | } | ||
| 3134 | } | ||
| 3135 | 1017 | ||
| 3136 | if (instr.fsetp.abs_b) { | 1018 | template <Type type> |
| 3137 | op_b = "abs(" + op_b + ')'; | 1019 | std::string LogicalGreaterEqual(Operation operation) { |
| 3138 | } | 1020 | return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); |
| 1021 | } | ||
| 3139 | 1022 | ||
| 3140 | // We can't use the constant predicate as destination. | 1023 | std::string LogicalFIsNan(Operation operation) { |
| 3141 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 1024 | return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false); |
| 1025 | } | ||
| 3142 | 1026 | ||
| 3143 | const std::string second_pred = | 1027 | std::string LogicalAssign(Operation operation) { |
| 3144 | GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | 1028 | const Node dest = operation[0]; |
| 1029 | const Node src = operation[1]; | ||
| 3145 | 1030 | ||
| 3146 | const std::string combiner = GetPredicateCombiner(instr.fsetp.op); | 1031 | std::string target; |
| 3147 | 1032 | ||
| 3148 | const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); | 1033 | if (const auto pred = std::get_if<PredicateNode>(dest)) { |
| 3149 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 1034 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); |
| 3150 | SetPredicate(instr.fsetp.pred3, | ||
| 3151 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3152 | 1035 | ||
| 3153 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1036 | const auto index = pred->GetIndex(); |
| 3154 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1037 | switch (index) { |
| 3155 | // if enabled | 1038 | case Tegra::Shader::Pred::NeverExecute: |
| 3156 | SetPredicate(instr.fsetp.pred0, | 1039 | case Tegra::Shader::Pred::UnusedIndex: |
| 3157 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | 1040 | // Writing to these predicates is a no-op |
| 1041 | return {}; | ||
| 3158 | } | 1042 | } |
| 3159 | break; | 1043 | target = GetPredicate(index); |
| 1044 | } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { | ||
| 1045 | target = GetInternalFlag(flag->GetFlag()); | ||
| 3160 | } | 1046 | } |
| 3161 | case OpCode::Type::IntegerSetPredicate: { | ||
| 3162 | const std::string op_a = | ||
| 3163 | regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); | ||
| 3164 | std::string op_b; | ||
| 3165 | 1047 | ||
| 3166 | if (instr.is_b_imm) { | 1048 | code.AddLine(target + " = " + Visit(src) + ';'); |
| 3167 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | 1049 | return {}; |
| 3168 | } else { | 1050 | } |
| 3169 | if (instr.is_b_gpr) { | ||
| 3170 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); | ||
| 3171 | } else { | ||
| 3172 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3173 | GLSLRegister::Type::Integer); | ||
| 3174 | } | ||
| 3175 | } | ||
| 3176 | |||
| 3177 | // We can't use the constant predicate as destination. | ||
| 3178 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3179 | 1051 | ||
| 3180 | const std::string second_pred = | 1052 | std::string LogicalAnd(Operation operation) { |
| 3181 | GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); | 1053 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); |
| 1054 | } | ||
| 3182 | 1055 | ||
| 3183 | const std::string combiner = GetPredicateCombiner(instr.isetp.op); | 1056 | std::string LogicalOr(Operation operation) { |
| 1057 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); | ||
| 1058 | } | ||
| 3184 | 1059 | ||
| 3185 | const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); | 1060 | std::string LogicalXor(Operation operation) { |
| 3186 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 1061 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); |
| 3187 | SetPredicate(instr.isetp.pred3, | 1062 | } |
| 3188 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3189 | 1063 | ||
| 3190 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1064 | std::string LogicalNegate(Operation operation) { |
| 3191 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1065 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false); |
| 3192 | // if enabled | 1066 | } |
| 3193 | SetPredicate(instr.isetp.pred0, | ||
| 3194 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3195 | } | ||
| 3196 | break; | ||
| 3197 | } | ||
| 3198 | case OpCode::Type::HalfSetPredicate: { | ||
| 3199 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | ||
| 3200 | |||
| 3201 | const std::string op_a = | ||
| 3202 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a, | ||
| 3203 | instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 3204 | |||
| 3205 | const std::string op_b = [&]() { | ||
| 3206 | switch (opcode->get().GetId()) { | ||
| 3207 | case OpCode::Id::HSETP2_R: | ||
| 3208 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3209 | instr.hsetp2.type_b, instr.hsetp2.abs_a, | ||
| 3210 | instr.hsetp2.negate_b); | ||
| 3211 | default: | ||
| 3212 | UNREACHABLE(); | ||
| 3213 | return std::string("vec2(0)"); | ||
| 3214 | } | ||
| 3215 | }(); | ||
| 3216 | 1067 | ||
| 3217 | // We can't use the constant predicate as destination. | 1068 | std::string LogicalPick2(Operation operation) { |
| 3218 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 1069 | const std::string pair = VisitOperand(operation, 0, Type::Bool2); |
| 1070 | return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']'; | ||
| 1071 | } | ||
| 3219 | 1072 | ||
| 3220 | const std::string second_pred = | 1073 | std::string LogicalAll2(Operation operation) { |
| 3221 | GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 1074 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1075 | } | ||
| 3222 | 1076 | ||
| 3223 | const std::string combiner = GetPredicateCombiner(instr.hsetp2.op); | 1077 | std::string LogicalAny2(Operation operation) { |
| 1078 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1079 | } | ||
| 3224 | 1080 | ||
| 3225 | const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||"; | 1081 | std::string Logical2HLessThan(Operation operation) { |
| 3226 | const std::string predicate = | 1082 | return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, |
| 3227 | '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' + | 1083 | Type::HalfFloat); |
| 3228 | component_combiner + ' ' + | 1084 | } |
| 3229 | GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')'; | ||
| 3230 | 1085 | ||
| 3231 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 1086 | std::string Logical2HEqual(Operation operation) { |
| 3232 | SetPredicate(instr.hsetp2.pred3, | 1087 | return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, |
| 3233 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | 1088 | Type::HalfFloat); |
| 1089 | } | ||
| 3234 | 1090 | ||
| 3235 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1091 | std::string Logical2HLessEqual(Operation operation) { |
| 3236 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1092 | return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, |
| 3237 | // if enabled | 1093 | Type::HalfFloat); |
| 3238 | SetPredicate(instr.hsetp2.pred0, | 1094 | } |
| 3239 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3240 | } | ||
| 3241 | break; | ||
| 3242 | } | ||
| 3243 | case OpCode::Type::PredicateSetRegister: { | ||
| 3244 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3245 | "Condition codes generation in PSET is partially implemented"); | ||
| 3246 | |||
| 3247 | const std::string op_a = | ||
| 3248 | GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 3249 | const std::string op_b = | ||
| 3250 | GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 3251 | |||
| 3252 | const std::string second_pred = | ||
| 3253 | GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 3254 | |||
| 3255 | const std::string combiner = GetPredicateCombiner(instr.pset.op); | ||
| 3256 | |||
| 3257 | const std::string predicate = | ||
| 3258 | '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')'; | ||
| 3259 | const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; | ||
| 3260 | if (instr.pset.bf == 0) { | ||
| 3261 | const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; | ||
| 3262 | regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false, | ||
| 3263 | instr.generates_cc); | ||
| 3264 | } else { | ||
| 3265 | const std::string value = '(' + result + ") ? 1.0 : 0.0"; | ||
| 3266 | regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc); | ||
| 3267 | } | ||
| 3268 | break; | ||
| 3269 | } | ||
| 3270 | case OpCode::Type::PredicateSetPredicate: { | ||
| 3271 | switch (opcode->get().GetId()) { | ||
| 3272 | case OpCode::Id::PSETP: { | ||
| 3273 | const std::string op_a = | ||
| 3274 | GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 3275 | const std::string op_b = | ||
| 3276 | GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 3277 | |||
| 3278 | // We can't use the constant predicate as destination. | ||
| 3279 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3280 | |||
| 3281 | const std::string second_pred = | ||
| 3282 | GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 3283 | |||
| 3284 | const std::string combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 3285 | |||
| 3286 | const std::string predicate = | ||
| 3287 | '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; | ||
| 3288 | |||
| 3289 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3290 | SetPredicate(instr.psetp.pred3, | ||
| 3291 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3292 | |||
| 3293 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3294 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3295 | // if enabled | ||
| 3296 | SetPredicate(instr.psetp.pred0, | ||
| 3297 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3298 | } | ||
| 3299 | break; | ||
| 3300 | } | ||
| 3301 | case OpCode::Id::CSETP: { | ||
| 3302 | const std::string pred = | ||
| 3303 | GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 3304 | const std::string combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 3305 | const std::string condition_code = regs.GetConditionCode(instr.csetp.cc); | ||
| 3306 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3307 | SetPredicate(instr.csetp.pred3, | ||
| 3308 | '(' + condition_code + ") " + combiner + " (" + pred + ')'); | ||
| 3309 | } | ||
| 3310 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3311 | SetPredicate(instr.csetp.pred0, | ||
| 3312 | "!(" + condition_code + ") " + combiner + " (" + pred + ')'); | ||
| 3313 | } | ||
| 3314 | break; | ||
| 3315 | } | ||
| 3316 | default: { | ||
| 3317 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 3318 | } | ||
| 3319 | } | ||
| 3320 | break; | ||
| 3321 | } | ||
| 3322 | case OpCode::Type::RegisterSetPredicate: { | ||
| 3323 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 3324 | 1095 | ||
| 3325 | const std::string apply_mask = [&]() { | 1096 | std::string Logical2HGreaterThan(Operation operation) { |
| 3326 | switch (opcode->get().GetId()) { | 1097 | return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, |
| 3327 | case OpCode::Id::R2P_IMM: | 1098 | Type::HalfFloat); |
| 3328 | return std::to_string(instr.r2p.immediate_mask); | 1099 | } |
| 3329 | default: | ||
| 3330 | UNREACHABLE(); | ||
| 3331 | return std::to_string(instr.r2p.immediate_mask); | ||
| 3332 | } | ||
| 3333 | }(); | ||
| 3334 | const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 3335 | " >> " + std::to_string(instr.r2p.byte) + ')'; | ||
| 3336 | 1100 | ||
| 3337 | constexpr u64 programmable_preds = 7; | 1101 | std::string Logical2HNotEqual(Operation operation) { |
| 3338 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | 1102 | return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, |
| 3339 | const auto shift = std::to_string(1 << pred); | 1103 | Type::HalfFloat); |
| 1104 | } | ||
| 3340 | 1105 | ||
| 3341 | shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {"); | 1106 | std::string Logical2HGreaterEqual(Operation operation) { |
| 3342 | ++shader.scope; | 1107 | return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, |
| 1108 | Type::HalfFloat); | ||
| 1109 | } | ||
| 3343 | 1110 | ||
| 3344 | SetPredicate(pred, '(' + mask + " & " + shift + ") != 0"); | 1111 | std::string F4Texture(Operation operation) { |
| 1112 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1113 | ASSERT(meta); | ||
| 3345 | 1114 | ||
| 3346 | --shader.scope; | 1115 | std::string expr = GenerateTexture(operation, "texture"); |
| 3347 | shader.AddLine('}'); | 1116 | if (meta->sampler.IsShadow()) { |
| 3348 | } | 1117 | expr = "vec4(" + expr + ')'; |
| 3349 | break; | ||
| 3350 | } | 1118 | } |
| 3351 | case OpCode::Type::FloatSet: { | 1119 | return expr + GetSwizzle(meta->element); |
| 3352 | const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), | 1120 | } |
| 3353 | instr.fset.abs_a != 0, instr.fset.neg_a != 0); | ||
| 3354 | |||
| 3355 | std::string op_b; | ||
| 3356 | |||
| 3357 | if (instr.is_b_imm) { | ||
| 3358 | const std::string imm = GetImmediate19(instr); | ||
| 3359 | op_b = imm; | ||
| 3360 | } else { | ||
| 3361 | if (instr.is_b_gpr) { | ||
| 3362 | op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3363 | } else { | ||
| 3364 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3365 | GLSLRegister::Type::Float); | ||
| 3366 | } | ||
| 3367 | } | ||
| 3368 | |||
| 3369 | op_b = GetOperandAbsNeg(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 3370 | |||
| 3371 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 3372 | // condition is true, and to 0 otherwise. | ||
| 3373 | const std::string second_pred = | ||
| 3374 | GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 3375 | |||
| 3376 | const std::string combiner = GetPredicateCombiner(instr.fset.op); | ||
| 3377 | 1121 | ||
| 3378 | const std::string predicate = "((" + | 1122 | std::string F4TextureLod(Operation operation) { |
| 3379 | GetPredicateComparison(instr.fset.cond, op_a, op_b) + | 1123 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 3380 | ") " + combiner + " (" + second_pred + "))"; | 1124 | ASSERT(meta); |
| 3381 | 1125 | ||
| 3382 | if (instr.fset.bf) { | 1126 | std::string expr = GenerateTexture(operation, "textureLod"); |
| 3383 | regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false, | 1127 | if (meta->sampler.IsShadow()) { |
| 3384 | instr.generates_cc); | 1128 | expr = "vec4(" + expr + ')'; |
| 3385 | } else { | ||
| 3386 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | ||
| 3387 | 1, false, instr.generates_cc); | ||
| 3388 | } | ||
| 3389 | break; | ||
| 3390 | } | 1129 | } |
| 3391 | case OpCode::Type::IntegerSet: { | 1130 | return expr + GetSwizzle(meta->element); |
| 3392 | const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed); | 1131 | } |
| 3393 | 1132 | ||
| 3394 | std::string op_b; | 1133 | std::string F4TextureGather(Operation operation) { |
| 1134 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1135 | ASSERT(meta); | ||
| 3395 | 1136 | ||
| 3396 | if (instr.is_b_imm) { | 1137 | return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + |
| 3397 | op_b = std::to_string(instr.alu.GetSignedImm20_20()); | 1138 | GetSwizzle(meta->element); |
| 3398 | } else { | 1139 | } |
| 3399 | if (instr.is_b_gpr) { | ||
| 3400 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed); | ||
| 3401 | } else { | ||
| 3402 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3403 | GLSLRegister::Type::Integer); | ||
| 3404 | } | ||
| 3405 | } | ||
| 3406 | |||
| 3407 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 3408 | // condition is true, and to 0 otherwise. | ||
| 3409 | const std::string second_pred = | ||
| 3410 | GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 3411 | 1140 | ||
| 3412 | const std::string combiner = GetPredicateCombiner(instr.iset.op); | 1141 | std::string F4TextureQueryDimensions(Operation operation) { |
| 1142 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1143 | ASSERT(meta); | ||
| 3413 | 1144 | ||
| 3414 | const std::string predicate = "((" + | 1145 | const std::string sampler = GetSampler(meta->sampler); |
| 3415 | GetPredicateComparison(instr.iset.cond, op_a, op_b) + | 1146 | const std::string lod = VisitOperand(operation, 0, Type::Int); |
| 3416 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3417 | 1147 | ||
| 3418 | if (instr.iset.bf) { | 1148 | switch (meta->element) { |
| 3419 | regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); | 1149 | case 0: |
| 3420 | } else { | 1150 | case 1: |
| 3421 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | 1151 | return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element); |
| 3422 | 1); | 1152 | case 2: |
| 3423 | } | 1153 | return "0"; |
| 3424 | break; | 1154 | case 3: |
| 1155 | return "textureQueryLevels(" + sampler + ')'; | ||
| 3425 | } | 1156 | } |
| 3426 | case OpCode::Type::HalfSet: { | 1157 | UNREACHABLE(); |
| 3427 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | 1158 | return "0"; |
| 3428 | 1159 | } | |
| 3429 | const std::string op_a = | ||
| 3430 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a, | ||
| 3431 | instr.hset2.abs_a != 0, instr.hset2.negate_a != 0); | ||
| 3432 | |||
| 3433 | const std::string op_b = [&]() { | ||
| 3434 | switch (opcode->get().GetId()) { | ||
| 3435 | case OpCode::Id::HSET2_R: | ||
| 3436 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3437 | instr.hset2.type_b, instr.hset2.abs_b != 0, | ||
| 3438 | instr.hset2.negate_b != 0); | ||
| 3439 | default: | ||
| 3440 | UNREACHABLE(); | ||
| 3441 | return std::string("vec2(0)"); | ||
| 3442 | } | ||
| 3443 | }(); | ||
| 3444 | |||
| 3445 | const std::string second_pred = | ||
| 3446 | GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0); | ||
| 3447 | |||
| 3448 | const std::string combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 3449 | |||
| 3450 | // HSET2 operates on each half float in the pack. | ||
| 3451 | std::string result; | ||
| 3452 | for (int i = 0; i < 2; ++i) { | ||
| 3453 | const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000"; | ||
| 3454 | const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000"; | ||
| 3455 | const std::string value = instr.hset2.bf == 1 ? float_value : integer_value; | ||
| 3456 | 1160 | ||
| 3457 | const std::string comp = std::string(".") + "xy"[i]; | 1161 | std::string F4TextureQueryLod(Operation operation) { |
| 3458 | const std::string predicate = | 1162 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 3459 | "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) + | 1163 | ASSERT(meta); |
| 3460 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3461 | 1164 | ||
| 3462 | result += '(' + predicate + " ? " + value + " : 0)"; | 1165 | if (meta->element < 2) { |
| 3463 | if (i == 0) { | 1166 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + |
| 3464 | result += " | "; | 1167 | GetSwizzle(meta->element) + "))"; |
| 3465 | } | ||
| 3466 | } | ||
| 3467 | regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1); | ||
| 3468 | break; | ||
| 3469 | } | 1168 | } |
| 3470 | case OpCode::Type::Xmad: { | 1169 | return "0"; |
| 3471 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | 1170 | } |
| 3472 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 3473 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3474 | "Condition codes generation in XMAD is partially implemented"); | ||
| 3475 | |||
| 3476 | std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; | ||
| 3477 | std::string op_b; | ||
| 3478 | std::string op_c; | ||
| 3479 | |||
| 3480 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 3481 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 3482 | const bool is_signed{instr.xmad.sign_a == 1}; | ||
| 3483 | |||
| 3484 | bool is_merge{}; | ||
| 3485 | switch (opcode->get().GetId()) { | ||
| 3486 | case OpCode::Id::XMAD_CR: { | ||
| 3487 | is_merge = instr.xmad.merge_56; | ||
| 3488 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3489 | instr.xmad.sign_b ? GLSLRegister::Type::Integer | ||
| 3490 | : GLSLRegister::Type::UnsignedInteger); | ||
| 3491 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3492 | break; | ||
| 3493 | } | ||
| 3494 | case OpCode::Id::XMAD_RR: { | ||
| 3495 | is_merge = instr.xmad.merge_37; | ||
| 3496 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); | ||
| 3497 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3498 | break; | ||
| 3499 | } | ||
| 3500 | case OpCode::Id::XMAD_RC: { | ||
| 3501 | op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); | ||
| 3502 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3503 | is_signed ? GLSLRegister::Type::Integer | ||
| 3504 | : GLSLRegister::Type::UnsignedInteger); | ||
| 3505 | break; | ||
| 3506 | } | ||
| 3507 | case OpCode::Id::XMAD_IMM: { | ||
| 3508 | is_merge = instr.xmad.merge_37; | ||
| 3509 | op_b += std::to_string(instr.xmad.imm20_16); | ||
| 3510 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3511 | break; | ||
| 3512 | } | ||
| 3513 | default: { | ||
| 3514 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 3515 | } | ||
| 3516 | } | ||
| 3517 | 1171 | ||
| 3518 | // TODO(bunnei): Ensure this is right with signed operands | 1172 | std::string F4TexelFetch(Operation operation) { |
| 3519 | if (instr.xmad.high_a) { | 1173 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; |
| 3520 | op_a = "((" + op_a + ") >> 16)"; | 1174 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 3521 | } else { | 1175 | const auto count = static_cast<u32>(operation.GetOperandsCount()); |
| 3522 | op_a = "((" + op_a + ") & 0xFFFF)"; | 1176 | ASSERT(meta); |
| 3523 | } | ||
| 3524 | 1177 | ||
| 3525 | std::string src2 = '(' + op_b + ')'; // Preserve original source 2 | 1178 | std::string expr = "texelFetch("; |
| 3526 | if (instr.xmad.high_b) { | 1179 | expr += GetSampler(meta->sampler); |
| 3527 | op_b = '(' + src2 + " >> 16)"; | 1180 | expr += ", "; |
| 3528 | } else { | ||
| 3529 | op_b = '(' + src2 + " & 0xFFFF)"; | ||
| 3530 | } | ||
| 3531 | 1181 | ||
| 3532 | std::string product = '(' + op_a + " * " + op_b + ')'; | 1182 | expr += constructors[meta->coords_count - 1]; |
| 3533 | if (instr.xmad.product_shift_left) { | 1183 | expr += '('; |
| 3534 | product = '(' + product + " << 16)"; | 1184 | for (u32 i = 0; i < count; ++i) { |
| 3535 | } | 1185 | expr += VisitOperand(operation, i, Type::Int); |
| 3536 | 1186 | ||
| 3537 | switch (instr.xmad.mode) { | 1187 | if (i + 1 == meta->coords_count) { |
| 3538 | case Tegra::Shader::XmadMode::None: | 1188 | expr += ')'; |
| 3539 | break; | ||
| 3540 | case Tegra::Shader::XmadMode::CLo: | ||
| 3541 | op_c = "((" + op_c + ") & 0xFFFF)"; | ||
| 3542 | break; | ||
| 3543 | case Tegra::Shader::XmadMode::CHi: | ||
| 3544 | op_c = "((" + op_c + ") >> 16)"; | ||
| 3545 | break; | ||
| 3546 | case Tegra::Shader::XmadMode::CBcc: | ||
| 3547 | op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; | ||
| 3548 | break; | ||
| 3549 | default: { | ||
| 3550 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", | ||
| 3551 | static_cast<u32>(instr.xmad.mode.Value())); | ||
| 3552 | } | ||
| 3553 | } | 1189 | } |
| 3554 | 1190 | if (i + 1 < count) { | |
| 3555 | std::string sum{'(' + product + " + " + op_c + ')'}; | 1191 | expr += ", "; |
| 3556 | if (is_merge) { | ||
| 3557 | sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; | ||
| 3558 | } | 1192 | } |
| 3559 | |||
| 3560 | regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false, | ||
| 3561 | instr.generates_cc); | ||
| 3562 | break; | ||
| 3563 | } | 1193 | } |
| 3564 | default: { | 1194 | expr += ')'; |
| 3565 | switch (opcode->get().GetId()) { | 1195 | return expr + GetSwizzle(meta->element); |
| 3566 | case OpCode::Id::EXIT: { | 1196 | } |
| 3567 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3568 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3569 | "EXIT condition code used: {}", static_cast<u32>(cc)); | ||
| 3570 | |||
| 3571 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 3572 | EmitFragmentOutputsWrite(); | ||
| 3573 | } | ||
| 3574 | |||
| 3575 | switch (instr.flow.cond) { | ||
| 3576 | case Tegra::Shader::FlowCondition::Always: | ||
| 3577 | shader.AddLine("return true;"); | ||
| 3578 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3579 | // If this is an unconditional exit then just end processing here, | ||
| 3580 | // otherwise we have to account for the possibility of the condition | ||
| 3581 | // not being met, so continue processing the next instruction. | ||
| 3582 | offset = PROGRAM_END - 1; | ||
| 3583 | } | ||
| 3584 | break; | ||
| 3585 | |||
| 3586 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 3587 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 3588 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 3589 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 3590 | break; | ||
| 3591 | |||
| 3592 | default: | ||
| 3593 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", | ||
| 3594 | static_cast<u32>(instr.flow.cond.Value())); | ||
| 3595 | } | ||
| 3596 | break; | ||
| 3597 | } | ||
| 3598 | case OpCode::Id::KIL: { | ||
| 3599 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 3600 | 1197 | ||
| 3601 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 1198 | std::string Branch(Operation operation) { |
| 3602 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | 1199 | const auto target = std::get_if<ImmediateNode>(operation[0]); |
| 3603 | "KIL condition code used: {}", static_cast<u32>(cc)); | 1200 | UNIMPLEMENTED_IF(!target); |
| 3604 | 1201 | ||
| 3605 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | 1202 | code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue())); |
| 3606 | // about unexecuted instructions that may follow this. | 1203 | code.AddLine("break;"); |
| 3607 | shader.AddLine("if (true) {"); | 1204 | return {}; |
| 3608 | ++shader.scope; | 1205 | } |
| 3609 | shader.AddLine("discard;"); | ||
| 3610 | --shader.scope; | ||
| 3611 | shader.AddLine("}"); | ||
| 3612 | 1206 | ||
| 3613 | break; | 1207 | std::string PushFlowStack(Operation operation) { |
| 3614 | } | 1208 | const auto target = std::get_if<ImmediateNode>(operation[0]); |
| 3615 | case OpCode::Id::OUT_R: { | 1209 | UNIMPLEMENTED_IF(!target); |
| 3616 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 3617 | "Stream buffer is not supported"); | ||
| 3618 | ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 3619 | "OUT is expected to be used in a geometry shader."); | ||
| 3620 | |||
| 3621 | if (instr.out.emit) { | ||
| 3622 | // gpr0 is used to store the next address. Hardware returns a pointer but | ||
| 3623 | // we just return the next index with a cyclic cap. | ||
| 3624 | const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)}; | ||
| 3625 | const std::string next = "((" + current + " + 1" + ") % " + | ||
| 3626 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'; | ||
| 3627 | shader.AddLine("emit_vertex(" + current + ");"); | ||
| 3628 | regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1); | ||
| 3629 | } | ||
| 3630 | if (instr.out.cut) { | ||
| 3631 | shader.AddLine("EndPrimitive();"); | ||
| 3632 | } | ||
| 3633 | 1210 | ||
| 3634 | break; | 1211 | code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue())); |
| 3635 | } | 1212 | return {}; |
| 3636 | case OpCode::Id::MOV_SYS: { | 1213 | } |
| 3637 | switch (instr.sys20) { | ||
| 3638 | case Tegra::Shader::SystemVariable::InvocationInfo: { | ||
| 3639 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | ||
| 3640 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); | ||
| 3641 | break; | ||
| 3642 | } | ||
| 3643 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 3644 | // Config pack's third value is Y_NEGATE's state. | ||
| 3645 | regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1); | ||
| 3646 | break; | ||
| 3647 | } | ||
| 3648 | default: { | ||
| 3649 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | ||
| 3650 | static_cast<u32>(instr.sys20.Value())); | ||
| 3651 | } | ||
| 3652 | } | ||
| 3653 | break; | ||
| 3654 | } | ||
| 3655 | case OpCode::Id::ISBERD: { | ||
| 3656 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 3657 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 3658 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 3659 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 3660 | ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 3661 | "ISBERD is expected to be used in a geometry shader."); | ||
| 3662 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 3663 | regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1); | ||
| 3664 | break; | ||
| 3665 | } | ||
| 3666 | case OpCode::Id::BRA: { | ||
| 3667 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3668 | "BRA with constant buffers are not implemented"); | ||
| 3669 | |||
| 3670 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3671 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3672 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 3673 | const std::string condition_code = regs.GetConditionCode(cc); | ||
| 3674 | shader.AddLine("if (" + condition_code + "){"); | ||
| 3675 | shader.scope++; | ||
| 3676 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3677 | shader.scope--; | ||
| 3678 | shader.AddLine('}'); | ||
| 3679 | } else { | ||
| 3680 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3681 | } | ||
| 3682 | break; | ||
| 3683 | } | ||
| 3684 | case OpCode::Id::IPA: { | ||
| 3685 | const auto& attribute = instr.attribute.fmt28; | ||
| 3686 | const auto& reg = instr.gpr0; | ||
| 3687 | |||
| 3688 | Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | ||
| 3689 | instr.ipa.sample_mode.Value()}; | ||
| 3690 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index, | ||
| 3691 | input_mode); | ||
| 3692 | 1214 | ||
| 3693 | if (instr.ipa.saturate) { | 1215 | std::string PopFlowStack(Operation operation) { |
| 3694 | regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true); | 1216 | code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); |
| 3695 | } | 1217 | code.AddLine("break;"); |
| 3696 | break; | 1218 | return {}; |
| 3697 | } | 1219 | } |
| 3698 | case OpCode::Id::SSY: { | ||
| 3699 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it | ||
| 3700 | // sets the target of the jump that the SYNC instruction will make. The SSY opcode | ||
| 3701 | // has a similar structure to the BRA opcode. | ||
| 3702 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3703 | "Constant buffer flow is not supported"); | ||
| 3704 | |||
| 3705 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3706 | EmitPushToFlowStack(target); | ||
| 3707 | break; | ||
| 3708 | } | ||
| 3709 | case OpCode::Id::PBK: { | ||
| 3710 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with | ||
| 3711 | // SSY but using SYNC on a PBK address will kill the shader execution. We don't | ||
| 3712 | // emulate this because it's very unlikely a driver will emit such invalid shader. | ||
| 3713 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3714 | "Constant buffer PBK is not supported"); | ||
| 3715 | |||
| 3716 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3717 | EmitPushToFlowStack(target); | ||
| 3718 | break; | ||
| 3719 | } | ||
| 3720 | case OpCode::Id::SYNC: { | ||
| 3721 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3722 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3723 | "SYNC condition code used: {}", static_cast<u32>(cc)); | ||
| 3724 | 1220 | ||
| 3725 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 1221 | std::string Exit(Operation operation) { |
| 3726 | EmitPopFromFlowStack(); | 1222 | if (stage != ShaderStage::Fragment) { |
| 3727 | break; | 1223 | code.AddLine("return;"); |
| 1224 | return {}; | ||
| 1225 | } | ||
| 1226 | const auto& used_registers = ir.GetRegisters(); | ||
| 1227 | const auto SafeGetRegister = [&](u32 reg) -> std::string { | ||
| 1228 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1229 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1230 | return GetRegister(reg); | ||
| 3728 | } | 1231 | } |
| 3729 | case OpCode::Id::BRK: { | 1232 | return "0.0f"; |
| 3730 | // The BRK opcode jumps to the address previously set by the PBK opcode | 1233 | }; |
| 3731 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3732 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3733 | "BRK condition code used: {}", static_cast<u32>(cc)); | ||
| 3734 | 1234 | ||
| 3735 | EmitPopFromFlowStack(); | 1235 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); |
| 3736 | break; | ||
| 3737 | } | ||
| 3738 | case OpCode::Id::DEPBAR: { | ||
| 3739 | // TODO(Subv): Find out if we actually have to care about this instruction or if | ||
| 3740 | // the GLSL compiler takes care of that for us. | ||
| 3741 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 3742 | break; | ||
| 3743 | } | ||
| 3744 | case OpCode::Id::VMAD: { | ||
| 3745 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3746 | "Condition codes generation in VMAD is not implemented"); | ||
| 3747 | |||
| 3748 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 3749 | const std::string op_a = GetVideoOperandA(instr); | ||
| 3750 | const std::string op_b = GetVideoOperandB(instr); | ||
| 3751 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); | ||
| 3752 | |||
| 3753 | std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 3754 | |||
| 3755 | switch (instr.vmad.shr) { | ||
| 3756 | case Tegra::Shader::VmadShr::Shr7: | ||
| 3757 | result = '(' + result + " >> 7)"; | ||
| 3758 | break; | ||
| 3759 | case Tegra::Shader::VmadShr::Shr15: | ||
| 3760 | result = '(' + result + " >> 15)"; | ||
| 3761 | break; | ||
| 3762 | } | ||
| 3763 | 1236 | ||
| 3764 | regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, | 1237 | code.AddLine("if (alpha_test[0] != 0) {"); |
| 3765 | instr.vmad.saturate, instr.vmad.cc); | 1238 | ++code.scope; |
| 3766 | break; | 1239 | // We start on the register containing the alpha value in the first RT. |
| 1240 | u32 current_reg = 3; | ||
| 1241 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { | ||
| 1242 | // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when | ||
| 1243 | // multiple render targets are used. | ||
| 1244 | if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || | ||
| 1245 | header.ps.IsColorComponentOutputEnabled(render_target, 1) || | ||
| 1246 | header.ps.IsColorComponentOutputEnabled(render_target, 2) || | ||
| 1247 | header.ps.IsColorComponentOutputEnabled(render_target, 3)) { | ||
| 1248 | code.AddLine( | ||
| 1249 | fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg))); | ||
| 1250 | current_reg += 4; | ||
| 3767 | } | 1251 | } |
| 3768 | case OpCode::Id::VSETP: { | 1252 | } |
| 3769 | const std::string op_a = GetVideoOperandA(instr); | 1253 | --code.scope; |
| 3770 | const std::string op_b = GetVideoOperandB(instr); | 1254 | code.AddLine('}'); |
| 3771 | |||
| 3772 | // We can't use the constant predicate as destination. | ||
| 3773 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3774 | |||
| 3775 | const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false); | ||
| 3776 | |||
| 3777 | const std::string combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 3778 | |||
| 3779 | const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b); | ||
| 3780 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3781 | SetPredicate(instr.vsetp.pred3, | ||
| 3782 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3783 | 1255 | ||
| 3784 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1256 | // Write the color outputs using the data in the shader registers, disabled |
| 3785 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1257 | // rendertargets/components are skipped in the register assignment. |
| 3786 | // if enabled | 1258 | current_reg = 0; |
| 3787 | SetPredicate(instr.vsetp.pred0, | 1259 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { |
| 3788 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | 1260 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1261 | for (u32 component = 0; component < 4; ++component) { | ||
| 1262 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 1263 | code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | ||
| 1264 | SafeGetRegister(current_reg))); | ||
| 1265 | ++current_reg; | ||
| 3789 | } | 1266 | } |
| 3790 | break; | ||
| 3791 | } | ||
| 3792 | default: { | ||
| 3793 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 3794 | break; | ||
| 3795 | } | ||
| 3796 | } | 1267 | } |
| 3797 | |||
| 3798 | break; | ||
| 3799 | } | ||
| 3800 | } | 1268 | } |
| 3801 | 1269 | ||
| 3802 | // Close the predicate condition scope. | 1270 | if (header.ps.omap.depth) { |
| 3803 | if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { | 1271 | // The depth output is always 2 registers after the last color output, and current_reg |
| 3804 | --shader.scope; | 1272 | // already contains one past the last color register. |
| 3805 | shader.AddLine('}'); | 1273 | code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';'); |
| 3806 | } | 1274 | } |
| 3807 | 1275 | ||
| 3808 | return offset + 1; | 1276 | code.AddLine("return;"); |
| 1277 | return {}; | ||
| 3809 | } | 1278 | } |
| 3810 | 1279 | ||
| 3811 | /** | 1280 | std::string Discard(Operation operation) { |
| 3812 | * Compiles a range of instructions from Tegra to GLSL. | 1281 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain |
| 3813 | * @param begin the offset of the starting instruction. | 1282 | // about unexecuted instructions that may follow this. |
| 3814 | * @param end the offset where the compilation should stop (exclusive). | 1283 | code.AddLine("if (true) {"); |
| 3815 | * @return the offset of the next instruction to compile. PROGRAM_END if the program | 1284 | ++code.scope; |
| 3816 | * terminates. | 1285 | code.AddLine("discard;"); |
| 3817 | */ | 1286 | --code.scope; |
| 3818 | u32 CompileRange(u32 begin, u32 end) { | 1287 | code.AddLine("}"); |
| 3819 | u32 program_counter; | 1288 | return {}; |
| 3820 | for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { | ||
| 3821 | program_counter = CompileInstr(program_counter); | ||
| 3822 | } | ||
| 3823 | return program_counter; | ||
| 3824 | } | 1289 | } |
| 3825 | 1290 | ||
| 3826 | void Generate(const std::string& suffix) { | 1291 | std::string EmitVertex(Operation operation) { |
| 3827 | // Add declarations for all subroutines | 1292 | ASSERT_MSG(stage == ShaderStage::Geometry, |
| 3828 | for (const auto& subroutine : subroutines) { | 1293 | "EmitVertex is expected to be used in a geometry shader."); |
| 3829 | shader.AddLine("bool " + subroutine.GetName() + "();"); | ||
| 3830 | } | ||
| 3831 | shader.AddNewLine(); | ||
| 3832 | |||
| 3833 | // Add the main entry point | ||
| 3834 | shader.AddLine("bool exec_" + suffix + "() {"); | ||
| 3835 | ++shader.scope; | ||
| 3836 | CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); | ||
| 3837 | --shader.scope; | ||
| 3838 | shader.AddLine("}\n"); | ||
| 3839 | |||
| 3840 | // Add definitions for all subroutines | ||
| 3841 | for (const auto& subroutine : subroutines) { | ||
| 3842 | std::set<u32> labels = subroutine.labels; | ||
| 3843 | |||
| 3844 | shader.AddLine("bool " + subroutine.GetName() + "() {"); | ||
| 3845 | ++shader.scope; | ||
| 3846 | |||
| 3847 | if (labels.empty()) { | ||
| 3848 | if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { | ||
| 3849 | shader.AddLine("return false;"); | ||
| 3850 | } | ||
| 3851 | } else { | ||
| 3852 | labels.insert(subroutine.begin); | ||
| 3853 | shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); | ||
| 3854 | |||
| 3855 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | ||
| 3856 | // unlikely that shaders will use 20 nested SSYs and PBKs. | ||
| 3857 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 3858 | shader.AddLine("uint flow_stack[" + std::to_string(FLOW_STACK_SIZE) + "];"); | ||
| 3859 | shader.AddLine("uint flow_stack_top = 0u;"); | ||
| 3860 | 1294 | ||
| 3861 | shader.AddLine("while (true) {"); | 1295 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| 3862 | ++shader.scope; | 1296 | // fragment). For more info about flipping, refer to gl_shader_gen.cpp. |
| 3863 | 1297 | code.AddLine("position.xy *= viewport_flip.xy;"); | |
| 3864 | shader.AddLine("switch (jmp_to) {"); | 1298 | code.AddLine("gl_Position = position;"); |
| 3865 | 1299 | code.AddLine("position.w = 1.0;"); | |
| 3866 | for (auto label : labels) { | 1300 | code.AddLine("EmitVertex();"); |
| 3867 | shader.AddLine("case " + std::to_string(label) + "u: {"); | 1301 | return {}; |
| 3868 | ++shader.scope; | 1302 | } |
| 1303 | |||
| 1304 | std::string EndPrimitive(Operation operation) { | ||
| 1305 | ASSERT_MSG(stage == ShaderStage::Geometry, | ||
| 1306 | "EndPrimitive is expected to be used in a geometry shader."); | ||
| 1307 | |||
| 1308 | code.AddLine("EndPrimitive();"); | ||
| 1309 | return {}; | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | std::string YNegate(Operation operation) { | ||
| 1313 | // Config pack's third value is Y_NEGATE's state. | ||
| 1314 | return "uintBitsToFloat(config_pack[2])"; | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | static constexpr OperationDecompilersArray operation_decompilers = { | ||
| 1318 | &GLSLDecompiler::Assign, | ||
| 1319 | |||
| 1320 | &GLSLDecompiler::Select, | ||
| 1321 | |||
| 1322 | &GLSLDecompiler::Add<Type::Float>, | ||
| 1323 | &GLSLDecompiler::Mul<Type::Float>, | ||
| 1324 | &GLSLDecompiler::Div<Type::Float>, | ||
| 1325 | &GLSLDecompiler::Fma<Type::Float>, | ||
| 1326 | &GLSLDecompiler::Negate<Type::Float>, | ||
| 1327 | &GLSLDecompiler::Absolute<Type::Float>, | ||
| 1328 | &GLSLDecompiler::FClamp, | ||
| 1329 | &GLSLDecompiler::Min<Type::Float>, | ||
| 1330 | &GLSLDecompiler::Max<Type::Float>, | ||
| 1331 | &GLSLDecompiler::FCos, | ||
| 1332 | &GLSLDecompiler::FSin, | ||
| 1333 | &GLSLDecompiler::FExp2, | ||
| 1334 | &GLSLDecompiler::FLog2, | ||
| 1335 | &GLSLDecompiler::FInverseSqrt, | ||
| 1336 | &GLSLDecompiler::FSqrt, | ||
| 1337 | &GLSLDecompiler::FRoundEven, | ||
| 1338 | &GLSLDecompiler::FFloor, | ||
| 1339 | &GLSLDecompiler::FCeil, | ||
| 1340 | &GLSLDecompiler::FTrunc, | ||
| 1341 | &GLSLDecompiler::FCastInteger<Type::Int>, | ||
| 1342 | &GLSLDecompiler::FCastInteger<Type::Uint>, | ||
| 1343 | |||
| 1344 | &GLSLDecompiler::Add<Type::Int>, | ||
| 1345 | &GLSLDecompiler::Mul<Type::Int>, | ||
| 1346 | &GLSLDecompiler::Div<Type::Int>, | ||
| 1347 | &GLSLDecompiler::Negate<Type::Int>, | ||
| 1348 | &GLSLDecompiler::Absolute<Type::Int>, | ||
| 1349 | &GLSLDecompiler::Min<Type::Int>, | ||
| 1350 | &GLSLDecompiler::Max<Type::Int>, | ||
| 1351 | |||
| 1352 | &GLSLDecompiler::ICastFloat, | ||
| 1353 | &GLSLDecompiler::ICastUnsigned, | ||
| 1354 | &GLSLDecompiler::LogicalShiftLeft<Type::Int>, | ||
| 1355 | &GLSLDecompiler::ILogicalShiftRight, | ||
| 1356 | &GLSLDecompiler::IArithmeticShiftRight, | ||
| 1357 | &GLSLDecompiler::BitwiseAnd<Type::Int>, | ||
| 1358 | &GLSLDecompiler::BitwiseOr<Type::Int>, | ||
| 1359 | &GLSLDecompiler::BitwiseXor<Type::Int>, | ||
| 1360 | &GLSLDecompiler::BitwiseNot<Type::Int>, | ||
| 1361 | &GLSLDecompiler::BitfieldInsert<Type::Int>, | ||
| 1362 | &GLSLDecompiler::BitfieldExtract<Type::Int>, | ||
| 1363 | &GLSLDecompiler::BitCount<Type::Int>, | ||
| 1364 | |||
| 1365 | &GLSLDecompiler::Add<Type::Uint>, | ||
| 1366 | &GLSLDecompiler::Mul<Type::Uint>, | ||
| 1367 | &GLSLDecompiler::Div<Type::Uint>, | ||
| 1368 | &GLSLDecompiler::Min<Type::Uint>, | ||
| 1369 | &GLSLDecompiler::Max<Type::Uint>, | ||
| 1370 | &GLSLDecompiler::UCastFloat, | ||
| 1371 | &GLSLDecompiler::UCastSigned, | ||
| 1372 | &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, | ||
| 1373 | &GLSLDecompiler::UShiftRight, | ||
| 1374 | &GLSLDecompiler::UShiftRight, | ||
| 1375 | &GLSLDecompiler::BitwiseAnd<Type::Uint>, | ||
| 1376 | &GLSLDecompiler::BitwiseOr<Type::Uint>, | ||
| 1377 | &GLSLDecompiler::BitwiseXor<Type::Uint>, | ||
| 1378 | &GLSLDecompiler::BitwiseNot<Type::Uint>, | ||
| 1379 | &GLSLDecompiler::BitfieldInsert<Type::Uint>, | ||
| 1380 | &GLSLDecompiler::BitfieldExtract<Type::Uint>, | ||
| 1381 | &GLSLDecompiler::BitCount<Type::Uint>, | ||
| 1382 | |||
| 1383 | &GLSLDecompiler::Add<Type::HalfFloat>, | ||
| 1384 | &GLSLDecompiler::Mul<Type::HalfFloat>, | ||
| 1385 | &GLSLDecompiler::Fma<Type::HalfFloat>, | ||
| 1386 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||
| 1387 | &GLSLDecompiler::HNegate, | ||
| 1388 | &GLSLDecompiler::HMergeF32, | ||
| 1389 | &GLSLDecompiler::HMergeH0, | ||
| 1390 | &GLSLDecompiler::HMergeH1, | ||
| 1391 | &GLSLDecompiler::HPack2, | ||
| 1392 | |||
| 1393 | &GLSLDecompiler::LogicalAssign, | ||
| 1394 | &GLSLDecompiler::LogicalAnd, | ||
| 1395 | &GLSLDecompiler::LogicalOr, | ||
| 1396 | &GLSLDecompiler::LogicalXor, | ||
| 1397 | &GLSLDecompiler::LogicalNegate, | ||
| 1398 | &GLSLDecompiler::LogicalPick2, | ||
| 1399 | &GLSLDecompiler::LogicalAll2, | ||
| 1400 | &GLSLDecompiler::LogicalAny2, | ||
| 1401 | |||
| 1402 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | ||
| 1403 | &GLSLDecompiler::LogicalEqual<Type::Float>, | ||
| 1404 | &GLSLDecompiler::LogicalLessEqual<Type::Float>, | ||
| 1405 | &GLSLDecompiler::LogicalGreaterThan<Type::Float>, | ||
| 1406 | &GLSLDecompiler::LogicalNotEqual<Type::Float>, | ||
| 1407 | &GLSLDecompiler::LogicalGreaterEqual<Type::Float>, | ||
| 1408 | &GLSLDecompiler::LogicalFIsNan, | ||
| 1409 | |||
| 1410 | &GLSLDecompiler::LogicalLessThan<Type::Int>, | ||
| 1411 | &GLSLDecompiler::LogicalEqual<Type::Int>, | ||
| 1412 | &GLSLDecompiler::LogicalLessEqual<Type::Int>, | ||
| 1413 | &GLSLDecompiler::LogicalGreaterThan<Type::Int>, | ||
| 1414 | &GLSLDecompiler::LogicalNotEqual<Type::Int>, | ||
| 1415 | &GLSLDecompiler::LogicalGreaterEqual<Type::Int>, | ||
| 1416 | |||
| 1417 | &GLSLDecompiler::LogicalLessThan<Type::Uint>, | ||
| 1418 | &GLSLDecompiler::LogicalEqual<Type::Uint>, | ||
| 1419 | &GLSLDecompiler::LogicalLessEqual<Type::Uint>, | ||
| 1420 | &GLSLDecompiler::LogicalGreaterThan<Type::Uint>, | ||
| 1421 | &GLSLDecompiler::LogicalNotEqual<Type::Uint>, | ||
| 1422 | &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, | ||
| 1423 | |||
| 1424 | &GLSLDecompiler::Logical2HLessThan, | ||
| 1425 | &GLSLDecompiler::Logical2HEqual, | ||
| 1426 | &GLSLDecompiler::Logical2HLessEqual, | ||
| 1427 | &GLSLDecompiler::Logical2HGreaterThan, | ||
| 1428 | &GLSLDecompiler::Logical2HNotEqual, | ||
| 1429 | &GLSLDecompiler::Logical2HGreaterEqual, | ||
| 1430 | |||
| 1431 | &GLSLDecompiler::F4Texture, | ||
| 1432 | &GLSLDecompiler::F4TextureLod, | ||
| 1433 | &GLSLDecompiler::F4TextureGather, | ||
| 1434 | &GLSLDecompiler::F4TextureQueryDimensions, | ||
| 1435 | &GLSLDecompiler::F4TextureQueryLod, | ||
| 1436 | &GLSLDecompiler::F4TexelFetch, | ||
| 1437 | |||
| 1438 | &GLSLDecompiler::Branch, | ||
| 1439 | &GLSLDecompiler::PushFlowStack, | ||
| 1440 | &GLSLDecompiler::PopFlowStack, | ||
| 1441 | &GLSLDecompiler::Exit, | ||
| 1442 | &GLSLDecompiler::Discard, | ||
| 1443 | |||
| 1444 | &GLSLDecompiler::EmitVertex, | ||
| 1445 | &GLSLDecompiler::EndPrimitive, | ||
| 1446 | |||
| 1447 | &GLSLDecompiler::YNegate, | ||
| 1448 | }; | ||
| 3869 | 1449 | ||
| 3870 | const auto next_it = labels.lower_bound(label + 1); | 1450 | std::string GetRegister(u32 index) const { |
| 3871 | const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; | 1451 | return GetDeclarationWithSuffix(index, "gpr"); |
| 1452 | } | ||
| 3872 | 1453 | ||
| 3873 | const u32 compile_end = CompileRange(label, next_label); | 1454 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 3874 | if (compile_end > next_label && compile_end != PROGRAM_END) { | 1455 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); |
| 3875 | // This happens only when there is a label inside a IF/LOOP block | 1456 | } |
| 3876 | shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); | ||
| 3877 | labels.emplace(compile_end); | ||
| 3878 | } | ||
| 3879 | 1457 | ||
| 3880 | --shader.scope; | 1458 | std::string GetInputAttribute(Attribute::Index attribute) const { |
| 3881 | shader.AddLine('}'); | 1459 | const auto index{static_cast<u32>(attribute) - |
| 3882 | } | 1460 | static_cast<u32>(Attribute::Index::Attribute_0)}; |
| 1461 | return GetDeclarationWithSuffix(index, "input_attr"); | ||
| 1462 | } | ||
| 3883 | 1463 | ||
| 3884 | shader.AddLine("default: return false;"); | 1464 | std::string GetOutputAttribute(Attribute::Index attribute) const { |
| 3885 | shader.AddLine('}'); | 1465 | const auto index{static_cast<u32>(attribute) - |
| 1466 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 1467 | return GetDeclarationWithSuffix(index, "output_attr"); | ||
| 1468 | } | ||
| 3886 | 1469 | ||
| 3887 | --shader.scope; | 1470 | std::string GetConstBuffer(u32 index) const { |
| 3888 | shader.AddLine('}'); | 1471 | return GetDeclarationWithSuffix(index, "cbuf"); |
| 1472 | } | ||
| 3889 | 1473 | ||
| 3890 | shader.AddLine("return false;"); | 1474 | std::string GetConstBufferBlock(u32 index) const { |
| 3891 | } | 1475 | return GetDeclarationWithSuffix(index, "cbuf_block"); |
| 1476 | } | ||
| 3892 | 1477 | ||
| 3893 | --shader.scope; | 1478 | std::string GetLocalMemory() const { |
| 3894 | shader.AddLine("}\n"); | 1479 | return "lmem_" + suffix; |
| 1480 | } | ||
| 3895 | 1481 | ||
| 3896 | DEBUG_ASSERT(shader.scope == 0); | 1482 | std::string GetInternalFlag(InternalFlag flag) const { |
| 3897 | } | 1483 | constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag", |
| 1484 | "carry_flag", "overflow_flag"}; | ||
| 1485 | const auto index = static_cast<u32>(flag); | ||
| 1486 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 3898 | 1487 | ||
| 3899 | GenerateDeclarations(); | 1488 | return std::string(InternalFlagNames[index]) + '_' + suffix; |
| 3900 | } | 1489 | } |
| 3901 | 1490 | ||
| 3902 | /// Add declarations for registers | 1491 | std::string GetSampler(const Sampler& sampler) const { |
| 3903 | void GenerateDeclarations() { | 1492 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 3904 | regs.GenerateDeclarations(suffix); | 1493 | } |
| 3905 | 1494 | ||
| 3906 | for (const auto& pred : declr_predicates) { | 1495 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { |
| 3907 | declarations.AddLine("bool " + pred + " = false;"); | 1496 | return name + '_' + std::to_string(index) + '_' + suffix; |
| 3908 | } | ||
| 3909 | declarations.AddNewLine(); | ||
| 3910 | } | 1497 | } |
| 3911 | 1498 | ||
| 3912 | private: | 1499 | const ShaderIR& ir; |
| 3913 | const std::set<Subroutine>& subroutines; | 1500 | const ShaderStage stage; |
| 3914 | const ProgramCode& program_code; | 1501 | const std::string suffix; |
| 3915 | Tegra::Shader::Header header; | 1502 | const Header header; |
| 3916 | const u32 main_offset; | 1503 | |
| 3917 | Maxwell3D::Regs::ShaderStage stage; | 1504 | ShaderWriter code; |
| 3918 | const std::string& suffix; | 1505 | }; |
| 3919 | u64 local_memory_size; | ||
| 3920 | std::size_t shader_length; | ||
| 3921 | |||
| 3922 | ShaderWriter shader; | ||
| 3923 | ShaderWriter declarations; | ||
| 3924 | GLSLRegisterManager regs{shader, declarations, stage, suffix, header}; | ||
| 3925 | |||
| 3926 | // Declarations | ||
| 3927 | std::set<std::string> declr_predicates; | ||
| 3928 | }; // namespace OpenGL::GLShader::Decompiler | ||
| 3929 | 1506 | ||
| 3930 | std::string GetCommonDeclarations() { | 1507 | std::string GetCommonDeclarations() { |
| 3931 | return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n", | 1508 | return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) + |
| 3932 | RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4)); | 1509 | "\n" |
| 1510 | "#define ftoi floatBitsToInt\n" | ||
| 1511 | "#define ftou floatBitsToUint\n" | ||
| 1512 | "#define itof intBitsToFloat\n" | ||
| 1513 | "#define utof uintBitsToFloat\n\n" | ||
| 1514 | "float fromHalf2(vec2 pair) {\n" | ||
| 1515 | " return utof(packHalf2x16(pair));\n" | ||
| 1516 | "}\n\n" | ||
| 1517 | "vec2 toHalf2(float value) {\n" | ||
| 1518 | " return unpackHalf2x16(ftou(value));\n" | ||
| 1519 | "}\n"; | ||
| 3933 | } | 1520 | } |
| 3934 | 1521 | ||
| 3935 | std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | 1522 | ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) { |
| 3936 | Maxwell3D::Regs::ShaderStage stage, | 1523 | GLSLDecompiler decompiler(ir, stage, suffix); |
| 3937 | const std::string& suffix) { | 1524 | decompiler.Decompile(); |
| 3938 | try { | 1525 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; |
| 3939 | ControlFlowAnalyzer analyzer(program_code, main_offset, suffix); | ||
| 3940 | const auto subroutines = analyzer.GetSubroutines(); | ||
| 3941 | GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix, | ||
| 3942 | analyzer.GetShaderLength()); | ||
| 3943 | return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; | ||
| 3944 | } catch (const DecompileFail& exception) { | ||
| 3945 | LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); | ||
| 3946 | } | ||
| 3947 | return {}; | ||
| 3948 | } | 1526 | } |
| 3949 | 1527 | ||
| 3950 | } // namespace OpenGL::GLShader::Decompiler | 1528 | } // namespace OpenGL::GLShader \ No newline at end of file |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index d01a4a7ee..396a560d8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -5,21 +5,84 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <functional> | ||
| 9 | #include <optional> | ||
| 10 | #include <string> | 8 | #include <string> |
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL::GLShader::Decompiler { | 15 | namespace VideoCommon::Shader { |
| 16 | class ShaderIR; | ||
| 17 | } | ||
| 16 | 18 | ||
| 17 | using Tegra::Engines::Maxwell3D; | 19 | namespace OpenGL::GLShader { |
| 20 | |||
| 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 22 | |||
| 23 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 24 | public: | ||
| 25 | explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, | ||
| 26 | Maxwell::ShaderStage stage, const std::string& name, u32 index) | ||
| 27 | : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {} | ||
| 28 | |||
| 29 | const std::string& GetName() const { | ||
| 30 | return name; | ||
| 31 | } | ||
| 32 | |||
| 33 | Maxwell::ShaderStage GetStage() const { | ||
| 34 | return stage; | ||
| 35 | } | ||
| 36 | |||
| 37 | u32 GetIndex() const { | ||
| 38 | return index; | ||
| 39 | } | ||
| 40 | |||
| 41 | u32 GetHash() const { | ||
| 42 | return (static_cast<u32>(stage) << 16) | index; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | std::string name; | ||
| 47 | Maxwell::ShaderStage stage{}; | ||
| 48 | u32 index{}; | ||
| 49 | }; | ||
| 50 | |||
| 51 | class SamplerEntry : public VideoCommon::Shader::Sampler { | ||
| 52 | public: | ||
| 53 | explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage, | ||
| 54 | const std::string& name) | ||
| 55 | : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {} | ||
| 56 | |||
| 57 | const std::string& GetName() const { | ||
| 58 | return name; | ||
| 59 | } | ||
| 60 | |||
| 61 | Maxwell::ShaderStage GetStage() const { | ||
| 62 | return stage; | ||
| 63 | } | ||
| 64 | |||
| 65 | u32 GetHash() const { | ||
| 66 | return (static_cast<u32>(stage) << 16) | static_cast<u32>(GetIndex()); | ||
| 67 | } | ||
| 68 | |||
| 69 | private: | ||
| 70 | std::string name; | ||
| 71 | Maxwell::ShaderStage stage{}; | ||
| 72 | }; | ||
| 73 | |||
| 74 | struct ShaderEntries { | ||
| 75 | std::vector<ConstBufferEntry> const_buffers; | ||
| 76 | std::vector<SamplerEntry> samplers; | ||
| 77 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 78 | std::size_t shader_length{}; | ||
| 79 | }; | ||
| 80 | |||
| 81 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 18 | 82 | ||
| 19 | std::string GetCommonDeclarations(); | 83 | std::string GetCommonDeclarations(); |
| 20 | 84 | ||
| 21 | std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | 85 | ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, |
| 22 | Maxwell3D::Regs::ShaderStage stage, | 86 | const std::string& suffix); |
| 23 | const std::string& suffix); | ||
| 24 | 87 | ||
| 25 | } // namespace OpenGL::GLShader::Decompiler | 88 | } // namespace OpenGL::GLShader \ No newline at end of file |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 5d0819dc5..446d1a93f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -7,22 +7,25 @@ | |||
| 7 | #include "video_core/engines/maxwell_3d.h" | 7 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 8 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 9 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 9 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 10 | 11 | ||
| 11 | namespace OpenGL::GLShader { | 12 | namespace OpenGL::GLShader { |
| 12 | 13 | ||
| 13 | using Tegra::Engines::Maxwell3D; | 14 | using Tegra::Engines::Maxwell3D; |
| 15 | using VideoCommon::Shader::ProgramCode; | ||
| 16 | using VideoCommon::Shader::ShaderIR; | ||
| 14 | 17 | ||
| 15 | static constexpr u32 PROGRAM_OFFSET{10}; | 18 | static constexpr u32 PROGRAM_OFFSET{10}; |
| 16 | 19 | ||
| 17 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { |
| 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 22 | |||
| 18 | std::string out = "#version 430 core\n"; | 23 | std::string out = "#version 430 core\n"; |
| 19 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 24 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 21 | out += "// Shader Unique Id: VS" + id + "\n\n"; | 25 | out += "// Shader Unique Id: VS" + id + "\n\n"; |
| 22 | out += Decompiler::GetCommonDeclarations(); | 26 | out += GetCommonDeclarations(); |
| 23 | 27 | ||
| 24 | out += R"( | 28 | out += R"( |
| 25 | |||
| 26 | layout (location = 0) out vec4 position; | 29 | layout (location = 0) out vec4 position; |
| 27 | 30 | ||
| 28 | layout(std140) uniform vs_config { | 31 | layout(std140) uniform vs_config { |
| @@ -30,40 +33,32 @@ layout(std140) uniform vs_config { | |||
| 30 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 33 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 31 | uvec4 alpha_test; | 34 | uvec4 alpha_test; |
| 32 | }; | 35 | }; |
| 33 | )"; | ||
| 34 | 36 | ||
| 35 | if (setup.IsDualProgram()) { | 37 | )"; |
| 36 | out += "bool exec_vertex_b();\n"; | 38 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); |
| 37 | } | 39 | ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); |
| 38 | |||
| 39 | ProgramResult program = | ||
| 40 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | ||
| 41 | Maxwell3D::Regs::ShaderStage::Vertex, "vertex") | ||
| 42 | .value_or(ProgramResult()); | ||
| 43 | 40 | ||
| 44 | out += program.first; | 41 | out += program.first; |
| 45 | 42 | ||
| 46 | if (setup.IsDualProgram()) { | 43 | if (setup.IsDualProgram()) { |
| 44 | ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | ||
| 47 | ProgramResult program_b = | 45 | ProgramResult program_b = |
| 48 | Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, | 46 | Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); |
| 49 | Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") | 47 | |
| 50 | .value_or(ProgramResult()); | ||
| 51 | out += program_b.first; | 48 | out += program_b.first; |
| 52 | } | 49 | } |
| 53 | 50 | ||
| 54 | out += R"( | 51 | out += R"( |
| 55 | |||
| 56 | void main() { | 52 | void main() { |
| 57 | position = vec4(0.0, 0.0, 0.0, 0.0); | 53 | position = vec4(0.0, 0.0, 0.0, 0.0); |
| 58 | exec_vertex(); | 54 | execute_vertex(); |
| 59 | )"; | 55 | )"; |
| 60 | 56 | ||
| 61 | if (setup.IsDualProgram()) { | 57 | if (setup.IsDualProgram()) { |
| 62 | out += " exec_vertex_b();"; | 58 | out += " execute_vertex_b();"; |
| 63 | } | 59 | } |
| 64 | 60 | ||
| 65 | out += R"( | 61 | out += R"( |
| 66 | |||
| 67 | // Check if the flip stage is VertexB | 62 | // Check if the flip stage is VertexB |
| 68 | // Config pack's second value is flip_stage | 63 | // Config pack's second value is flip_stage |
| 69 | if (config_pack[1] == 1) { | 64 | if (config_pack[1] == 1) { |
| @@ -77,30 +72,20 @@ void main() { | |||
| 77 | if (config_pack[1] == 1) { | 72 | if (config_pack[1] == 1) { |
| 78 | position.w = 1.0; | 73 | position.w = 1.0; |
| 79 | } | 74 | } |
| 80 | } | 75 | })"; |
| 81 | |||
| 82 | )"; | ||
| 83 | 76 | ||
| 84 | return {out, program.second}; | 77 | return {out, program.second}; |
| 85 | } | 78 | } |
| 86 | 79 | ||
| 87 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | 80 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { |
| 88 | // Version is intentionally skipped in shader generation, it's added by the lazy compilation. | 81 | // Version is intentionally skipped in shader generation, it's added by the lazy compilation. |
| 89 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 90 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 82 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 83 | |||
| 84 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 91 | out += "// Shader Unique Id: GS" + id + "\n\n"; | 85 | out += "// Shader Unique Id: GS" + id + "\n\n"; |
| 92 | out += Decompiler::GetCommonDeclarations(); | 86 | out += GetCommonDeclarations(); |
| 93 | out += "bool exec_geometry();\n"; | ||
| 94 | 87 | ||
| 95 | ProgramResult program = | ||
| 96 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | ||
| 97 | Maxwell3D::Regs::ShaderStage::Geometry, "geometry") | ||
| 98 | .value_or(ProgramResult()); | ||
| 99 | out += R"( | 88 | out += R"( |
| 100 | out gl_PerVertex { | ||
| 101 | vec4 gl_Position; | ||
| 102 | }; | ||
| 103 | |||
| 104 | layout (location = 0) in vec4 gs_position[]; | 89 | layout (location = 0) in vec4 gs_position[]; |
| 105 | layout (location = 0) out vec4 position; | 90 | layout (location = 0) out vec4 position; |
| 106 | 91 | ||
| @@ -110,36 +95,37 @@ layout (std140) uniform gs_config { | |||
| 110 | uvec4 alpha_test; | 95 | uvec4 alpha_test; |
| 111 | }; | 96 | }; |
| 112 | 97 | ||
| 113 | void main() { | ||
| 114 | exec_geometry(); | ||
| 115 | } | ||
| 116 | |||
| 117 | )"; | 98 | )"; |
| 99 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 100 | ProgramResult program = | ||
| 101 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | ||
| 118 | out += program.first; | 102 | out += program.first; |
| 103 | |||
| 104 | out += R"( | ||
| 105 | void main() { | ||
| 106 | execute_geometry(); | ||
| 107 | };)"; | ||
| 108 | |||
| 119 | return {out, program.second}; | 109 | return {out, program.second}; |
| 120 | } | 110 | } |
| 121 | 111 | ||
| 122 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | 112 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { |
| 113 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 114 | |||
| 123 | std::string out = "#version 430 core\n"; | 115 | std::string out = "#version 430 core\n"; |
| 124 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 116 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 125 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 126 | out += "// Shader Unique Id: FS" + id + "\n\n"; | 117 | out += "// Shader Unique Id: FS" + id + "\n\n"; |
| 127 | out += Decompiler::GetCommonDeclarations(); | 118 | out += GetCommonDeclarations(); |
| 128 | out += "bool exec_fragment();\n"; | ||
| 129 | 119 | ||
| 130 | ProgramResult program = | ||
| 131 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | ||
| 132 | Maxwell3D::Regs::ShaderStage::Fragment, "fragment") | ||
| 133 | .value_or(ProgramResult()); | ||
| 134 | out += R"( | 120 | out += R"( |
| 135 | layout(location = 0) out vec4 FragColor0; | 121 | layout (location = 0) out vec4 FragColor0; |
| 136 | layout(location = 1) out vec4 FragColor1; | 122 | layout (location = 1) out vec4 FragColor1; |
| 137 | layout(location = 2) out vec4 FragColor2; | 123 | layout (location = 2) out vec4 FragColor2; |
| 138 | layout(location = 3) out vec4 FragColor3; | 124 | layout (location = 3) out vec4 FragColor3; |
| 139 | layout(location = 4) out vec4 FragColor4; | 125 | layout (location = 4) out vec4 FragColor4; |
| 140 | layout(location = 5) out vec4 FragColor5; | 126 | layout (location = 5) out vec4 FragColor5; |
| 141 | layout(location = 6) out vec4 FragColor6; | 127 | layout (location = 6) out vec4 FragColor6; |
| 142 | layout(location = 7) out vec4 FragColor7; | 128 | layout (location = 7) out vec4 FragColor7; |
| 143 | 129 | ||
| 144 | layout (location = 0) in vec4 position; | 130 | layout (location = 0) in vec4 position; |
| 145 | 131 | ||
| @@ -173,12 +159,20 @@ bool AlphaFunc(in float value) { | |||
| 173 | } | 159 | } |
| 174 | } | 160 | } |
| 175 | 161 | ||
| 162 | )"; | ||
| 163 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 164 | ProgramResult program = | ||
| 165 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 166 | |||
| 167 | out += program.first; | ||
| 168 | |||
| 169 | out += R"( | ||
| 176 | void main() { | 170 | void main() { |
| 177 | exec_fragment(); | 171 | execute_fragment(); |
| 178 | } | 172 | } |
| 179 | 173 | ||
| 180 | )"; | 174 | )"; |
| 181 | out += program.first; | ||
| 182 | return {out, program.second}; | 175 | return {out, program.second}; |
| 183 | } | 176 | } |
| 184 | } // namespace OpenGL::GLShader | 177 | |
| 178 | } // namespace OpenGL::GLShader \ No newline at end of file | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fcc20d3b4..ac5e6917b 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -10,164 +10,12 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 13 | 15 | ||
| 14 | namespace OpenGL::GLShader { | 16 | namespace OpenGL::GLShader { |
| 15 | 17 | ||
| 16 | constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | 18 | using VideoCommon::Shader::ProgramCode; |
| 17 | using ProgramCode = std::vector<u64>; | ||
| 18 | |||
| 19 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | ||
| 20 | |||
| 21 | class ConstBufferEntry { | ||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 23 | |||
| 24 | public: | ||
| 25 | void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) { | ||
| 26 | is_used = true; | ||
| 27 | this->index = static_cast<unsigned>(index); | ||
| 28 | this->stage = stage; | ||
| 29 | max_offset = std::max(max_offset, static_cast<unsigned>(offset)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) { | ||
| 33 | is_used = true; | ||
| 34 | is_indirect = true; | ||
| 35 | this->index = static_cast<unsigned>(index); | ||
| 36 | this->stage = stage; | ||
| 37 | } | ||
| 38 | |||
| 39 | bool IsUsed() const { | ||
| 40 | return is_used; | ||
| 41 | } | ||
| 42 | |||
| 43 | bool IsIndirect() const { | ||
| 44 | return is_indirect; | ||
| 45 | } | ||
| 46 | |||
| 47 | unsigned GetIndex() const { | ||
| 48 | return index; | ||
| 49 | } | ||
| 50 | |||
| 51 | unsigned GetSize() const { | ||
| 52 | return max_offset + 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string GetName() const { | ||
| 56 | return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index); | ||
| 57 | } | ||
| 58 | |||
| 59 | u32 GetHash() const { | ||
| 60 | return (static_cast<u32>(stage) << 16) | index; | ||
| 61 | } | ||
| 62 | |||
| 63 | private: | ||
| 64 | static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = { | ||
| 65 | "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c", | ||
| 66 | }; | ||
| 67 | |||
| 68 | bool is_used{}; | ||
| 69 | bool is_indirect{}; | ||
| 70 | unsigned index{}; | ||
| 71 | unsigned max_offset{}; | ||
| 72 | Maxwell::ShaderStage stage; | ||
| 73 | }; | ||
| 74 | |||
| 75 | class SamplerEntry { | ||
| 76 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 77 | |||
| 78 | public: | ||
| 79 | SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, | ||
| 80 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow) | ||
| 81 | : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array), | ||
| 82 | is_shadow(is_shadow) {} | ||
| 83 | |||
| 84 | std::size_t GetOffset() const { | ||
| 85 | return offset; | ||
| 86 | } | ||
| 87 | |||
| 88 | std::size_t GetIndex() const { | ||
| 89 | return sampler_index; | ||
| 90 | } | ||
| 91 | |||
| 92 | Maxwell::ShaderStage GetStage() const { | ||
| 93 | return stage; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::string GetName() const { | ||
| 97 | return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' + | ||
| 98 | std::to_string(sampler_index); | ||
| 99 | } | ||
| 100 | |||
| 101 | std::string GetTypeString() const { | ||
| 102 | using Tegra::Shader::TextureType; | ||
| 103 | std::string glsl_type; | ||
| 104 | |||
| 105 | switch (type) { | ||
| 106 | case TextureType::Texture1D: | ||
| 107 | glsl_type = "sampler1D"; | ||
| 108 | break; | ||
| 109 | case TextureType::Texture2D: | ||
| 110 | glsl_type = "sampler2D"; | ||
| 111 | break; | ||
| 112 | case TextureType::Texture3D: | ||
| 113 | glsl_type = "sampler3D"; | ||
| 114 | break; | ||
| 115 | case TextureType::TextureCube: | ||
| 116 | glsl_type = "samplerCube"; | ||
| 117 | break; | ||
| 118 | default: | ||
| 119 | UNIMPLEMENTED(); | ||
| 120 | } | ||
| 121 | if (is_array) | ||
| 122 | glsl_type += "Array"; | ||
| 123 | if (is_shadow) | ||
| 124 | glsl_type += "Shadow"; | ||
| 125 | return glsl_type; | ||
| 126 | } | ||
| 127 | |||
| 128 | Tegra::Shader::TextureType GetType() const { | ||
| 129 | return type; | ||
| 130 | } | ||
| 131 | |||
| 132 | bool IsArray() const { | ||
| 133 | return is_array; | ||
| 134 | } | ||
| 135 | |||
| 136 | bool IsShadow() const { | ||
| 137 | return is_shadow; | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetHash() const { | ||
| 141 | return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | static std::string GetArrayName(Maxwell::ShaderStage stage) { | ||
| 145 | return TextureSamplerNames[static_cast<std::size_t>(stage)]; | ||
| 146 | } | ||
| 147 | |||
| 148 | private: | ||
| 149 | static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = { | ||
| 150 | "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs", | ||
| 151 | }; | ||
| 152 | |||
| 153 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||
| 154 | /// instruction. | ||
| 155 | std::size_t offset; | ||
| 156 | Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. | ||
| 157 | std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. | ||
| 158 | Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) | ||
| 159 | bool is_array; ///< Whether the texture is being sampled as an array texture or not. | ||
| 160 | bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 161 | }; | ||
| 162 | |||
| 163 | struct ShaderEntries { | ||
| 164 | std::vector<ConstBufferEntry> const_buffer_entries; | ||
| 165 | std::vector<SamplerEntry> texture_samplers; | ||
| 166 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances; | ||
| 167 | std::size_t shader_length; | ||
| 168 | }; | ||
| 169 | |||
| 170 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 171 | 19 | ||
| 172 | struct ShaderSetup { | 20 | struct ShaderSetup { |
| 173 | explicit ShaderSetup(ProgramCode program_code) { | 21 | explicit ShaderSetup(ProgramCode program_code) { |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp new file mode 100644 index 000000000..6fdcac784 --- /dev/null +++ b/src/video_core/shader/decode.cpp | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <set> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/engines/shader_header.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | |||
| 18 | using Tegra::Shader::Instruction; | ||
| 19 | using Tegra::Shader::OpCode; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | /// Merges exit method of two parallel branches. | ||
| 24 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 25 | if (a == ExitMethod::Undetermined) { | ||
| 26 | return b; | ||
| 27 | } | ||
| 28 | if (b == ExitMethod::Undetermined) { | ||
| 29 | return a; | ||
| 30 | } | ||
| 31 | if (a == b) { | ||
| 32 | return a; | ||
| 33 | } | ||
| 34 | return ExitMethod::Conditional; | ||
| 35 | } | ||
| 36 | |||
| 37 | /** | ||
| 38 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 39 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 40 | */ | ||
| 41 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 42 | constexpr u32 SchedPeriod = 4; | ||
| 43 | u32 absolute_offset = offset - main_offset; | ||
| 44 | |||
| 45 | return (absolute_offset % SchedPeriod) == 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace | ||
| 49 | |||
| 50 | void ShaderIR::Decode() { | ||
| 51 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 52 | |||
| 53 | std::set<u32> labels; | ||
| 54 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | ||
| 55 | if (exit_method != ExitMethod::AlwaysEnd) { | ||
| 56 | UNREACHABLE_MSG("Program does not always end"); | ||
| 57 | } | ||
| 58 | |||
| 59 | if (labels.empty()) { | ||
| 60 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | |||
| 64 | labels.insert(main_offset); | ||
| 65 | |||
| 66 | for (const u32 label : labels) { | ||
| 67 | const auto next_it = labels.lower_bound(label + 1); | ||
| 68 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | ||
| 69 | |||
| 70 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 75 | const auto [iter, inserted] = | ||
| 76 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 77 | ExitMethod& exit_method = iter->second; | ||
| 78 | if (!inserted) | ||
| 79 | return exit_method; | ||
| 80 | |||
| 81 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 82 | coverage_begin = std::min(coverage_begin, offset); | ||
| 83 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 84 | |||
| 85 | const Instruction instr = {program_code[offset]}; | ||
| 86 | const auto opcode = OpCode::Decode(instr); | ||
| 87 | if (!opcode) | ||
| 88 | continue; | ||
| 89 | switch (opcode->get().GetId()) { | ||
| 90 | case OpCode::Id::EXIT: { | ||
| 91 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 92 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 93 | // and check the exit method of that other basic block. | ||
| 94 | using Tegra::Shader::Pred; | ||
| 95 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 96 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 97 | } else { | ||
| 98 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 99 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | case OpCode::Id::BRA: { | ||
| 103 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 104 | labels.insert(target); | ||
| 105 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||
| 106 | const ExitMethod jmp = Scan(target, end, labels); | ||
| 107 | return exit_method = ParallelExit(no_jmp, jmp); | ||
| 108 | } | ||
| 109 | case OpCode::Id::SSY: | ||
| 110 | case OpCode::Id::PBK: { | ||
| 111 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 112 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 113 | "Constant buffer branching is not supported"); | ||
| 114 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 115 | labels.insert(target); | ||
| 116 | // Continue scanning for an exit method. | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | } | ||
| 121 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 122 | } | ||
| 123 | |||
| 124 | BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 125 | BasicBlock basic_block; | ||
| 126 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 127 | pc = DecodeInstr(basic_block, pc); | ||
| 128 | } | ||
| 129 | return std::move(basic_block); | ||
| 130 | } | ||
| 131 | |||
| 132 | u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { | ||
| 133 | // Ignore sched instructions when generating code. | ||
| 134 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 135 | return pc + 1; | ||
| 136 | } | ||
| 137 | |||
| 138 | const Instruction instr = {program_code[pc]}; | ||
| 139 | const auto opcode = OpCode::Decode(instr); | ||
| 140 | |||
| 141 | // Decoding failure | ||
| 142 | if (!opcode) { | ||
| 143 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 144 | return pc + 1; | ||
| 145 | } | ||
| 146 | |||
| 147 | bb.push_back( | ||
| 148 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | ||
| 149 | |||
| 150 | using Tegra::Shader::Pred; | ||
| 151 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 152 | "NeverExecute predicate not implemented"); | ||
| 153 | |||
| 154 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> | ||
| 155 | decoders = { | ||
| 156 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 157 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 158 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 159 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 160 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 161 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 162 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 163 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 164 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 165 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 166 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 167 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 168 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 169 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 170 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 171 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 172 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 173 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 174 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 175 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 176 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 177 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 178 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 179 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 180 | }; | ||
| 181 | |||
| 182 | std::vector<Node> tmp_block; | ||
| 183 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 184 | pc = (this->*decoder->second)(tmp_block, bb, pc); | ||
| 185 | } else { | ||
| 186 | pc = DecodeOther(tmp_block, bb, pc); | ||
| 187 | } | ||
| 188 | |||
| 189 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 190 | // executed. | ||
| 191 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 192 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 193 | |||
| 194 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 195 | bb.push_back( | ||
| 196 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); | ||
| 197 | } else { | ||
| 198 | for (auto& node : tmp_block) { | ||
| 199 | bb.push_back(std::move(node)); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | return pc + 1; | ||
| 204 | } | ||
| 205 | |||
| 206 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp new file mode 100644 index 000000000..e7847f614 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -0,0 +1,155 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::SubOp; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | Node op_b = [&]() -> Node { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::MOV_C: | ||
| 34 | case OpCode::Id::MOV_R: { | ||
| 35 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 36 | SetRegister(bb, instr.gpr0, op_b); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::FMUL_C: | ||
| 40 | case OpCode::Id::FMUL_R: | ||
| 41 | case OpCode::Id::FMUL_IMM: { | ||
| 42 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 43 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", | ||
| 44 | instr.fmul.tab5cb8_2.Value()); | ||
| 45 | UNIMPLEMENTED_IF_MSG( | ||
| 46 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | ||
| 47 | instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default | ||
| 48 | |||
| 49 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 50 | |||
| 51 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 52 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 53 | |||
| 54 | if (instr.fmul.postfactor != 0) { | ||
| 55 | auto postfactor = static_cast<s32>(instr.fmul.postfactor); | ||
| 56 | |||
| 57 | // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below | ||
| 58 | // logic. | ||
| 59 | if (postfactor >= 4) { | ||
| 60 | postfactor = 7 - postfactor; | ||
| 61 | } else { | ||
| 62 | postfactor = 0 - postfactor; | ||
| 63 | } | ||
| 64 | |||
| 65 | if (postfactor > 0) { | ||
| 66 | value = Operation(OperationCode::FMul, NO_PRECISE, value, | ||
| 67 | Immediate(static_cast<f32>(1 << postfactor))); | ||
| 68 | } else { | ||
| 69 | value = Operation(OperationCode::FDiv, NO_PRECISE, value, | ||
| 70 | Immediate(static_cast<f32>(1 << -postfactor))); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", | ||
| 114 | static_cast<unsigned>(instr.sub_op.Value())); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | }(); | ||
| 118 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 119 | |||
| 120 | SetRegister(bb, instr.gpr0, value); | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | case OpCode::Id::FMNMX_C: | ||
| 124 | case OpCode::Id::FMNMX_R: | ||
| 125 | case OpCode::Id::FMNMX_IMM: { | ||
| 126 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 127 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 128 | |||
| 129 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 130 | |||
| 131 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 133 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 134 | |||
| 135 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 136 | SetRegister(bb, instr.gpr0, value); | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::RRO_C: | ||
| 140 | case OpCode::Id::RRO_R: | ||
| 141 | case OpCode::Id::RRO_IMM: { | ||
| 142 | // Currently RRO is only implemented as a register move. | ||
| 143 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 144 | SetRegister(bb, instr.gpr0, op_b); | ||
| 145 | LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); | ||
| 146 | break; | ||
| 147 | } | ||
| 148 | default: | ||
| 149 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 150 | } | ||
| 151 | |||
| 152 | return pc; | ||
| 153 | } | ||
| 154 | |||
| 155 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp new file mode 100644 index 000000000..a237dcb92 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half.cpp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | ||
| 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | ||
| 21 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | ||
| 22 | } | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); | ||
| 24 | |||
| 25 | const bool negate_a = | ||
| 26 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | ||
| 27 | const bool negate_b = | ||
| 28 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | ||
| 29 | |||
| 30 | const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); | ||
| 31 | |||
| 32 | // instr.alu_half.type_a | ||
| 33 | |||
| 34 | Node op_b = [&]() { | ||
| 35 | switch (opcode->get().GetId()) { | ||
| 36 | case OpCode::Id::HADD2_C: | ||
| 37 | case OpCode::Id::HMUL2_C: | ||
| 38 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 39 | case OpCode::Id::HADD2_R: | ||
| 40 | case OpCode::Id::HMUL2_R: | ||
| 41 | return GetRegister(instr.gpr20); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | ||
| 48 | |||
| 49 | Node value = [&]() { | ||
| 50 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; | ||
| 51 | switch (opcode->get().GetId()) { | ||
| 52 | case OpCode::Id::HADD2_C: | ||
| 53 | case OpCode::Id::HADD2_R: | ||
| 54 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | ||
| 55 | case OpCode::Id::HMUL2_C: | ||
| 56 | case OpCode::Id::HMUL2_R: | ||
| 57 | return Operation(OperationCode::HMul, meta, op_a, op_b); | ||
| 58 | default: | ||
| 59 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 60 | return Immediate(0); | ||
| 61 | } | ||
| 62 | }(); | ||
| 63 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 64 | |||
| 65 | SetRegister(bb, instr.gpr0, value); | ||
| 66 | |||
| 67 | return pc; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp new file mode 100644 index 000000000..7b4f7d284 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 20 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | ||
| 21 | } else { | ||
| 22 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | ||
| 23 | } | ||
| 24 | UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, | ||
| 25 | "Half float immediate saturation not implemented"); | ||
| 26 | |||
| 27 | Node op_a = GetRegister(instr.gpr8); | ||
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 29 | |||
| 30 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 31 | |||
| 32 | Node value = [&]() { | ||
| 33 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HADD2_IMM: | ||
| 36 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | ||
| 37 | case OpCode::Id::HMUL2_IMM: | ||
| 38 | return Operation(OperationCode::HMul, meta, op_a, op_b); | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return Immediate(0); | ||
| 42 | } | ||
| 43 | }(); | ||
| 44 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 45 | |||
| 46 | SetRegister(bb, instr.gpr0, value); | ||
| 47 | |||
| 48 | return pc; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp new file mode 100644 index 000000000..4fd3db54e --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | switch (opcode->get().GetId()) { | ||
| 20 | case OpCode::Id::MOV32_IMM: { | ||
| 21 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 22 | break; | ||
| 23 | } | ||
| 24 | case OpCode::Id::FMUL32_IMM: { | ||
| 25 | Node value = | ||
| 26 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 27 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 28 | |||
| 29 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 30 | SetRegister(bb, instr.gpr0, value); | ||
| 31 | break; | ||
| 32 | } | ||
| 33 | case OpCode::Id::FADD32I: { | ||
| 34 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 35 | instr.fadd32i.negate_a); | ||
| 36 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 37 | instr.fadd32i.negate_b); | ||
| 38 | |||
| 39 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 40 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 46 | opcode->get().GetName()); | ||
| 47 | } | ||
| 48 | |||
| 49 | return pc; | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp new file mode 100644 index 000000000..4a8cc1a1c --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::IAdd3Height; | ||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::Register; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | Node op_b = [&]() { | ||
| 24 | if (instr.is_b_imm) { | ||
| 25 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 26 | } else if (instr.is_b_gpr) { | ||
| 27 | return GetRegister(instr.gpr20); | ||
| 28 | } else { | ||
| 29 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | |||
| 33 | switch (opcode->get().GetId()) { | ||
| 34 | case OpCode::Id::IADD_C: | ||
| 35 | case OpCode::Id::IADD_R: | ||
| 36 | case OpCode::Id::IADD_IMM: { | ||
| 37 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); | ||
| 38 | |||
| 39 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 40 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 41 | |||
| 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | ||
| 43 | |||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | ||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | case OpCode::Id::IADD3_C: | ||
| 49 | case OpCode::Id::IADD3_R: | ||
| 50 | case OpCode::Id::IADD3_IMM: { | ||
| 51 | Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 54 | switch (height) { | ||
| 55 | case IAdd3Height::None: | ||
| 56 | return value; | ||
| 57 | case IAdd3Height::LowerHalfWord: | ||
| 58 | return BitfieldExtract(value, 0, 16); | ||
| 59 | case IAdd3Height::UpperHalfWord: | ||
| 60 | return BitfieldExtract(value, 16, 16); | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); | ||
| 63 | return Immediate(0); | ||
| 64 | } | ||
| 65 | }; | ||
| 66 | |||
| 67 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 68 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 69 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 70 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 71 | } | ||
| 72 | |||
| 73 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 74 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 75 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 76 | |||
| 77 | const Node value = [&]() { | ||
| 78 | const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 79 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 80 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 81 | } | ||
| 82 | const Node shifted = [&]() { | ||
| 83 | switch (instr.iadd3.mode) { | ||
| 84 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 85 | // TODO(tech4me): According to | ||
| 86 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 87 | // The addition between op_a and op_b should be done in uint33, more | ||
| 88 | // investigation required | ||
| 89 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 90 | Immediate(16)); | ||
| 91 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 92 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 93 | Immediate(16)); | ||
| 94 | default: | ||
| 95 | return add_ab; | ||
| 96 | } | ||
| 97 | }(); | ||
| 98 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 99 | }(); | ||
| 100 | |||
| 101 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 102 | SetRegister(bb, instr.gpr0, value); | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | case OpCode::Id::ISCADD_C: | ||
| 106 | case OpCode::Id::ISCADD_R: | ||
| 107 | case OpCode::Id::ISCADD_IMM: { | ||
| 108 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 109 | "Condition codes generation in ISCADD is not implemented"); | ||
| 110 | |||
| 111 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 112 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 113 | |||
| 114 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 115 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 116 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 117 | |||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::POPC_C: | ||
| 123 | case OpCode::Id::POPC_R: | ||
| 124 | case OpCode::Id::POPC_IMM: { | ||
| 125 | if (instr.popc.invert) { | ||
| 126 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 127 | } | ||
| 128 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 129 | SetRegister(bb, instr.gpr0, value); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case OpCode::Id::SEL_C: | ||
| 133 | case OpCode::Id::SEL_R: | ||
| 134 | case OpCode::Id::SEL_IMM: { | ||
| 135 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 136 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 137 | SetRegister(bb, instr.gpr0, value); | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | case OpCode::Id::LOP_C: | ||
| 141 | case OpCode::Id::LOP_R: | ||
| 142 | case OpCode::Id::LOP_IMM: { | ||
| 143 | if (instr.alu.lop.invert_a) | ||
| 144 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 145 | if (instr.alu.lop.invert_b) | ||
| 146 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 147 | |||
| 148 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 149 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 150 | instr.generates_cc); | ||
| 151 | break; | ||
| 152 | } | ||
| 153 | case OpCode::Id::LOP3_C: | ||
| 154 | case OpCode::Id::LOP3_R: | ||
| 155 | case OpCode::Id::LOP3_IMM: { | ||
| 156 | const Node op_c = GetRegister(instr.gpr39); | ||
| 157 | const Node lut = [&]() { | ||
| 158 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 159 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 160 | } else { | ||
| 161 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 162 | } | ||
| 163 | }(); | ||
| 164 | |||
| 165 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | case OpCode::Id::IMNMX_C: | ||
| 169 | case OpCode::Id::IMNMX_R: | ||
| 170 | case OpCode::Id::IMNMX_IMM: { | ||
| 171 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 172 | |||
| 173 | const bool is_signed = instr.imnmx.is_signed; | ||
| 174 | |||
| 175 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 176 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 177 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 178 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 179 | |||
| 180 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 181 | SetRegister(bb, instr.gpr0, value); | ||
| 182 | break; | ||
| 183 | } | ||
| 184 | case OpCode::Id::LEA_R2: | ||
| 185 | case OpCode::Id::LEA_R1: | ||
| 186 | case OpCode::Id::LEA_IMM: | ||
| 187 | case OpCode::Id::LEA_RZ: | ||
| 188 | case OpCode::Id::LEA_HI: { | ||
| 189 | const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::LEA_R2: { | ||
| 192 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 193 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 194 | } | ||
| 195 | |||
| 196 | case OpCode::Id::LEA_R1: { | ||
| 197 | const bool neg = instr.lea.r1.neg != 0; | ||
| 198 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 199 | GetRegister(instr.gpr20), | ||
| 200 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 201 | } | ||
| 202 | |||
| 203 | case OpCode::Id::LEA_IMM: { | ||
| 204 | const bool neg = instr.lea.imm.neg != 0; | ||
| 205 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 206 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 207 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 208 | } | ||
| 209 | |||
| 210 | case OpCode::Id::LEA_RZ: { | ||
| 211 | const bool neg = instr.lea.rz.neg != 0; | ||
| 212 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 213 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 214 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 215 | } | ||
| 216 | |||
| 217 | case OpCode::Id::LEA_HI: | ||
| 218 | default: | ||
| 219 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 220 | |||
| 221 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 222 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 223 | } | ||
| 224 | }(); | ||
| 225 | |||
| 226 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 227 | "Unhandled LEA Predicate"); | ||
| 228 | |||
| 229 | const Node shifted_c = | ||
| 230 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); | ||
| 231 | const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); | ||
| 232 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); | ||
| 233 | |||
| 234 | SetRegister(bb, instr.gpr0, value); | ||
| 235 | |||
| 236 | break; | ||
| 237 | } | ||
| 238 | default: | ||
| 239 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 240 | } | ||
| 241 | |||
| 242 | return pc; | ||
| 243 | } | ||
| 244 | |||
| 245 | void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 246 | Node imm_lut, bool sets_cc) { | ||
| 247 | constexpr u32 lop_iterations = 32; | ||
| 248 | const Node one = Immediate(1); | ||
| 249 | const Node two = Immediate(2); | ||
| 250 | |||
| 251 | Node value{}; | ||
| 252 | for (u32 i = 0; i < lop_iterations; ++i) { | ||
| 253 | const Node shift_amount = Immediate(i); | ||
| 254 | |||
| 255 | const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); | ||
| 256 | const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); | ||
| 257 | |||
| 258 | const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); | ||
| 259 | const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); | ||
| 260 | const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); | ||
| 261 | |||
| 262 | const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); | ||
| 263 | const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); | ||
| 264 | const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); | ||
| 265 | |||
| 266 | const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); | ||
| 267 | const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); | ||
| 268 | |||
| 269 | const Node shifted_bit = | ||
| 270 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); | ||
| 271 | const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); | ||
| 272 | |||
| 273 | const Node right = | ||
| 274 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); | ||
| 275 | |||
| 276 | if (i > 0) { | ||
| 277 | value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); | ||
| 278 | } else { | ||
| 279 | value = right; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | SetInternalFlagsFromInteger(bb, value, sets_cc); | ||
| 284 | SetRegister(bb, dest, value); | ||
| 285 | } | ||
| 286 | |||
| 287 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp new file mode 100644 index 000000000..b26a6e473 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::LogicOperation; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::PredicateResultMode; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 25 | |||
| 26 | switch (opcode->get().GetId()) { | ||
| 27 | case OpCode::Id::IADD32I: { | ||
| 28 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 29 | |||
| 30 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); | ||
| 31 | |||
| 32 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | ||
| 33 | |||
| 34 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | ||
| 35 | SetRegister(bb, instr.gpr0, value); | ||
| 36 | break; | ||
| 37 | } | ||
| 38 | case OpCode::Id::LOP32I: { | ||
| 39 | if (instr.alu.lop32i.invert_a) | ||
| 40 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 41 | |||
| 42 | if (instr.alu.lop32i.invert_b) | ||
| 43 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 44 | |||
| 45 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, | ||
| 46 | PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); | ||
| 47 | break; | ||
| 48 | } | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 51 | opcode->get().GetName()); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, | ||
| 58 | Node op_a, Node op_b, PredicateResultMode predicate_mode, | ||
| 59 | Pred predicate, bool sets_cc) { | ||
| 60 | const Node result = [&]() { | ||
| 61 | switch (logic_op) { | ||
| 62 | case LogicOperation::And: | ||
| 63 | return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); | ||
| 64 | case LogicOperation::Or: | ||
| 65 | return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); | ||
| 66 | case LogicOperation::Xor: | ||
| 67 | return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); | ||
| 68 | case LogicOperation::PassB: | ||
| 69 | return op_b; | ||
| 70 | default: | ||
| 71 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); | ||
| 72 | return Immediate(0); | ||
| 73 | } | ||
| 74 | }(); | ||
| 75 | |||
| 76 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 77 | SetRegister(bb, dest, result); | ||
| 78 | |||
| 79 | // Write the predicate value depending on the predicate mode. | ||
| 80 | switch (predicate_mode) { | ||
| 81 | case PredicateResultMode::None: | ||
| 82 | // Do nothing. | ||
| 83 | return; | ||
| 84 | case PredicateResultMode::NotZero: { | ||
| 85 | // Set the predicate to true if the result is not zero. | ||
| 86 | const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); | ||
| 87 | SetPredicate(bb, static_cast<u64>(predicate), compare); | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | default: | ||
| 91 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", | ||
| 92 | static_cast<u32>(predicate_mode)); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp new file mode 100644 index 000000000..0734141b0 --- /dev/null +++ b/src/video_core/shader/decode/bfe.cpp | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 20 | |||
| 21 | Node op_a = GetRegister(instr.gpr8); | ||
| 22 | op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); | ||
| 23 | |||
| 24 | switch (opcode->get().GetId()) { | ||
| 25 | case OpCode::Id::BFE_IMM: { | ||
| 26 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 27 | "Condition codes generation in BFE is not implemented"); | ||
| 28 | |||
| 29 | const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); | ||
| 30 | const Node outer_shift_imm = | ||
| 31 | Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); | ||
| 32 | |||
| 33 | const Node inner_shift = | ||
| 34 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); | ||
| 35 | const Node outer_shift = | ||
| 36 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); | ||
| 37 | |||
| 38 | SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); | ||
| 39 | SetRegister(bb, instr.gpr0, outer_shift); | ||
| 40 | break; | ||
| 41 | } | ||
| 42 | default: | ||
| 43 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | ||
| 44 | } | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp new file mode 100644 index 000000000..942d6729d --- /dev/null +++ b/src/video_core/shader/decode/bfi.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { | ||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::BFI_IMM_R: | ||
| 22 | return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; | ||
| 23 | default: | ||
| 24 | UNREACHABLE(); | ||
| 25 | return {Immediate(0), Immediate(0)}; | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | const Node insert = GetRegister(instr.gpr8); | ||
| 29 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 30 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 31 | |||
| 32 | const Node value = | ||
| 33 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 36 | SetRegister(bb, instr.gpr0, value); | ||
| 37 | |||
| 38 | return pc; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp new file mode 100644 index 000000000..ee18d3a99 --- /dev/null +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Register; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::I2I_R: { | ||
| 22 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 23 | |||
| 24 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 25 | const bool output_signed = instr.conversion.is_output_signed; | ||
| 26 | |||
| 27 | Node value = GetRegister(instr.gpr20); | ||
| 28 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 29 | |||
| 30 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, | ||
| 31 | input_signed); | ||
| 32 | if (input_signed != output_signed) { | ||
| 33 | value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); | ||
| 34 | } | ||
| 35 | |||
| 36 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 37 | SetRegister(bb, instr.gpr0, value); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case OpCode::Id::I2F_R: | ||
| 41 | case OpCode::Id::I2F_C: { | ||
| 42 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 43 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 44 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 45 | "Condition codes generation in I2F is not implemented"); | ||
| 46 | |||
| 47 | Node value = [&]() { | ||
| 48 | if (instr.is_b_gpr) { | ||
| 49 | return GetRegister(instr.gpr20); | ||
| 50 | } else { | ||
| 51 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 52 | } | ||
| 53 | }(); | ||
| 54 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 55 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 56 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 57 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 58 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 59 | |||
| 60 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 61 | SetRegister(bb, instr.gpr0, value); | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | case OpCode::Id::F2F_R: | ||
| 65 | case OpCode::Id::F2F_C: { | ||
| 66 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 67 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 69 | "Condition codes generation in F2F is not implemented"); | ||
| 70 | |||
| 71 | Node value = [&]() { | ||
| 72 | if (instr.is_b_gpr) { | ||
| 73 | return GetRegister(instr.gpr20); | ||
| 74 | } else { | ||
| 75 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 76 | } | ||
| 77 | }(); | ||
| 78 | |||
| 79 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 80 | |||
| 81 | value = [&]() { | ||
| 82 | switch (instr.conversion.f2f.rounding) { | ||
| 83 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 84 | return value; | ||
| 85 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 86 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 87 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 88 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 89 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 90 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 91 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 92 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 93 | } | ||
| 94 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 95 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||
| 96 | return Immediate(0); | ||
| 97 | }(); | ||
| 98 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 99 | |||
| 100 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 101 | SetRegister(bb, instr.gpr0, value); | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | case OpCode::Id::F2I_R: | ||
| 105 | case OpCode::Id::F2I_C: { | ||
| 106 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 107 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 108 | "Condition codes generation in F2I is not implemented"); | ||
| 109 | Node value = [&]() { | ||
| 110 | if (instr.is_b_gpr) { | ||
| 111 | return GetRegister(instr.gpr20); | ||
| 112 | } else { | ||
| 113 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 114 | } | ||
| 115 | }(); | ||
| 116 | |||
| 117 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 118 | |||
| 119 | value = [&]() { | ||
| 120 | switch (instr.conversion.f2i.rounding) { | ||
| 121 | case Tegra::Shader::F2iRoundingOp::None: | ||
| 122 | return value; | ||
| 123 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 124 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 125 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 126 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 127 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 128 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 129 | default: | ||
| 130 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 131 | static_cast<u32>(instr.conversion.f2i.rounding.Value())); | ||
| 132 | return Immediate(0); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 136 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 137 | value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0, value); | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | default: | ||
| 143 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 144 | } | ||
| 145 | |||
| 146 | return pc; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/video_core/shader/decode/decode_integer_set.cpp | |||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp new file mode 100644 index 000000000..be8dc2230 --- /dev/null +++ b/src/video_core/shader/decode/ffma.cpp | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | ||
| 21 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | ||
| 22 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | ||
| 23 | instr.ffma.tab5980_1.Value()); | ||
| 24 | |||
| 25 | const Node op_a = GetRegister(instr.gpr8); | ||
| 26 | |||
| 27 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 28 | switch (opcode->get().GetId()) { | ||
| 29 | case OpCode::Id::FFMA_CR: { | ||
| 30 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), | ||
| 31 | GetRegister(instr.gpr39)}; | ||
| 32 | } | ||
| 33 | case OpCode::Id::FFMA_RR: | ||
| 34 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 35 | case OpCode::Id::FFMA_RC: { | ||
| 36 | return {GetRegister(instr.gpr39), | ||
| 37 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 38 | } | ||
| 39 | case OpCode::Id::FFMA_IMM: | ||
| 40 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 41 | default: | ||
| 42 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 43 | return {Immediate(0), Immediate(0)}; | ||
| 44 | } | ||
| 45 | }(); | ||
| 46 | |||
| 47 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 48 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 49 | |||
| 50 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 51 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 52 | |||
| 53 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 54 | SetRegister(bb, instr.gpr0, value); | ||
| 55 | |||
| 56 | return pc; | ||
| 57 | } | ||
| 58 | |||
| 59 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp new file mode 100644 index 000000000..ba846f1bd --- /dev/null +++ b/src/video_core/shader/decode/float_set.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp new file mode 100644 index 000000000..e88b04d18 --- /dev/null +++ b/src/video_core/shader/decode/float_set_predicate.cpp | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); | ||
| 37 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 38 | |||
| 39 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 40 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 41 | |||
| 42 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 43 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 44 | |||
| 45 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 46 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 47 | // if enabled | ||
| 48 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 49 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 50 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 51 | } | ||
| 52 | |||
| 53 | return pc; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp new file mode 100644 index 000000000..dfd7cb98f --- /dev/null +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | ||
| 22 | |||
| 23 | // instr.hset2.type_a | ||
| 24 | // instr.hset2.type_b | ||
| 25 | Node op_a = GetRegister(instr.gpr8); | ||
| 26 | Node op_b = [&]() { | ||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HSET2_R: | ||
| 29 | return GetRegister(instr.gpr20); | ||
| 30 | default: | ||
| 31 | UNREACHABLE(); | ||
| 32 | return Immediate(0); | ||
| 33 | } | ||
| 34 | }(); | ||
| 35 | |||
| 36 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 37 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | ||
| 38 | |||
| 39 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 40 | |||
| 41 | MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; | ||
| 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); | ||
| 43 | |||
| 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 45 | |||
| 46 | // HSET2 operates on each half float in the pack. | ||
| 47 | std::array<Node, 2> values; | ||
| 48 | for (u32 i = 0; i < 2; ++i) { | ||
| 49 | const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; | ||
| 50 | const Node true_value = Immediate(raw_value << (i * 16)); | ||
| 51 | const Node false_value = Immediate(0); | ||
| 52 | |||
| 53 | const Node comparison = | ||
| 54 | Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 55 | const Node predicate = Operation(combiner, comparison, second_pred); | ||
| 56 | |||
| 57 | values[i] = | ||
| 58 | Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); | ||
| 59 | } | ||
| 60 | |||
| 61 | const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); | ||
| 62 | SetRegister(bb, instr.gpr0, value); | ||
| 63 | |||
| 64 | return pc; | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp new file mode 100644 index 000000000..53c44ae5a --- /dev/null +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 24 | |||
| 25 | const Node op_b = [&]() { | ||
| 26 | switch (opcode->get().GetId()) { | ||
| 27 | case OpCode::Id::HSETP2_R: | ||
| 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | ||
| 29 | instr.hsetp2.negate_b); | ||
| 30 | default: | ||
| 31 | UNREACHABLE(); | ||
| 32 | return Immediate(0); | ||
| 33 | } | ||
| 34 | }(); | ||
| 35 | |||
| 36 | // We can't use the constant predicate as destination. | ||
| 37 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 38 | |||
| 39 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | ||
| 40 | |||
| 41 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 42 | const OperationCode pair_combiner = | ||
| 43 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 44 | |||
| 45 | MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; | ||
| 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); | ||
| 47 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 48 | |||
| 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 50 | const Node value = Operation(combiner, first_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.hsetp2.pred3, value); | ||
| 52 | |||
| 53 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 54 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 55 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 56 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 57 | } | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp new file mode 100644 index 000000000..4a6b945f9 --- /dev/null +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfPrecision; | ||
| 15 | using Tegra::Shader::HalfType; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 24 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | ||
| 25 | } else { | ||
| 26 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | ||
| 27 | } | ||
| 28 | |||
| 29 | constexpr auto identity = HalfType::H0_H1; | ||
| 30 | |||
| 31 | const HalfType type_a = instr.hfma2.type_a; | ||
| 32 | const Node op_a = GetRegister(instr.gpr8); | ||
| 33 | |||
| 34 | bool neg_b{}, neg_c{}; | ||
| 35 | auto [saturate, type_b, op_b, type_c, | ||
| 36 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HFMA2_CR: | ||
| 39 | neg_b = instr.hfma2.negate_b; | ||
| 40 | neg_c = instr.hfma2.negate_c; | ||
| 41 | return {instr.hfma2.saturate, instr.hfma2.type_b, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::HFMA2_RC: | ||
| 45 | neg_b = instr.hfma2.negate_b; | ||
| 46 | neg_c = instr.hfma2.negate_c; | ||
| 47 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 48 | instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 49 | case OpCode::Id::HFMA2_RR: | ||
| 50 | neg_b = instr.hfma2.rr.negate_b; | ||
| 51 | neg_c = instr.hfma2.rr.negate_c; | ||
| 52 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 53 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 54 | case OpCode::Id::HFMA2_IMM_R: | ||
| 55 | neg_c = instr.hfma2.negate_c; | ||
| 56 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 57 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 58 | default: | ||
| 59 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 60 | } | ||
| 61 | }(); | ||
| 62 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | ||
| 63 | |||
| 64 | op_b = GetOperandAbsNegHalf(op_b, false, neg_b); | ||
| 65 | op_c = GetOperandAbsNegHalf(op_c, false, neg_c); | ||
| 66 | |||
| 67 | MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; | ||
| 68 | Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); | ||
| 69 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 70 | |||
| 71 | SetRegister(bb, instr.gpr0, value); | ||
| 72 | |||
| 73 | return pc; | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp new file mode 100644 index 000000000..85e67b03b --- /dev/null +++ b/src/video_core/shader/decode/integer_set.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetRegister(instr.gpr8); | ||
| 20 | const Node op_b = [&]() { | ||
| 21 | if (instr.is_b_imm) { | ||
| 22 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 23 | } else if (instr.is_b_gpr) { | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | } else { | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 27 | } | ||
| 28 | }(); | ||
| 29 | |||
| 30 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 31 | // is true, and to 0 otherwise. | ||
| 32 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 33 | const Node first_pred = | ||
| 34 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 35 | |||
| 36 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 37 | |||
| 38 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 39 | |||
| 40 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 41 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 42 | const Node value = | ||
| 43 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 44 | |||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | |||
| 47 | return pc; | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp new file mode 100644 index 000000000..c8b105a08 --- /dev/null +++ b/src/video_core/shader/decode/integer_set_predicate.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp new file mode 100644 index 000000000..ae71672d6 --- /dev/null +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -0,0 +1,688 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Shader::Attribute; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | using Tegra::Shader::TextureMiscMode; | ||
| 20 | using Tegra::Shader::TextureProcessMode; | ||
| 21 | using Tegra::Shader::TextureType; | ||
| 22 | |||
| 23 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 24 | switch (texture_type) { | ||
| 25 | case TextureType::Texture1D: | ||
| 26 | return 1; | ||
| 27 | case TextureType::Texture2D: | ||
| 28 | return 2; | ||
| 29 | case TextureType::Texture3D: | ||
| 30 | case TextureType::TextureCube: | ||
| 31 | return 3; | ||
| 32 | default: | ||
| 33 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 39 | const Instruction instr = {program_code[pc]}; | ||
| 40 | const auto opcode = OpCode::Decode(instr); | ||
| 41 | |||
| 42 | switch (opcode->get().GetId()) { | ||
| 43 | case OpCode::Id::LD_A: { | ||
| 44 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 45 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 46 | "Indirect attribute loads are not supported"); | ||
| 47 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 48 | "Unaligned attribute loads are not supported"); | ||
| 49 | |||
| 50 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 51 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 52 | |||
| 53 | u64 next_element = instr.attribute.fmt20.element; | ||
| 54 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 55 | |||
| 56 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 57 | const Node buffer = GetRegister(instr.gpr39); | ||
| 58 | const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 59 | next_element, input_mode, buffer); | ||
| 60 | |||
| 61 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 62 | |||
| 63 | // Load the next attribute element into the following register. If the element | ||
| 64 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 65 | // attribute. | ||
| 66 | next_element = (next_element + 1) % 4; | ||
| 67 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 68 | }; | ||
| 69 | |||
| 70 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 71 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 72 | LoadNextElement(reg_offset); | ||
| 73 | } | ||
| 74 | break; | ||
| 75 | } | ||
| 76 | case OpCode::Id::LD_C: { | ||
| 77 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 78 | |||
| 79 | Node index = GetRegister(instr.gpr8); | ||
| 80 | |||
| 81 | const Node op_a = | ||
| 82 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); | ||
| 83 | |||
| 84 | switch (instr.ld_c.type.Value()) { | ||
| 85 | case Tegra::Shader::UniformType::Single: | ||
| 86 | SetRegister(bb, instr.gpr0, op_a); | ||
| 87 | break; | ||
| 88 | |||
| 89 | case Tegra::Shader::UniformType::Double: { | ||
| 90 | const Node op_b = | ||
| 91 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); | ||
| 92 | |||
| 93 | SetTemporal(bb, 0, op_a); | ||
| 94 | SetTemporal(bb, 1, op_b); | ||
| 95 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 96 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | default: | ||
| 100 | UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 101 | } | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | case OpCode::Id::LD_L: { | ||
| 105 | UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", | ||
| 106 | static_cast<unsigned>(instr.ld_l.unknown.Value())); | ||
| 107 | |||
| 108 | const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), | ||
| 109 | Immediate(static_cast<s32>(instr.smem_imm))); | ||
| 110 | const Node lmem = GetLocalMemory(index); | ||
| 111 | |||
| 112 | switch (instr.ldst_sl.type.Value()) { | ||
| 113 | case Tegra::Shader::StoreType::Bytes32: | ||
| 114 | SetRegister(bb, instr.gpr0, lmem); | ||
| 115 | break; | ||
| 116 | default: | ||
| 117 | UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", | ||
| 118 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 119 | } | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::ST_A: { | ||
| 123 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 124 | "Indirect attribute loads are not supported"); | ||
| 125 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 126 | "Unaligned attribute loads are not supported"); | ||
| 127 | |||
| 128 | u64 next_element = instr.attribute.fmt20.element; | ||
| 129 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 130 | |||
| 131 | const auto StoreNextElement = [&](u32 reg_offset) { | ||
| 132 | const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 133 | next_element, GetRegister(instr.gpr39)); | ||
| 134 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 135 | |||
| 136 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 137 | |||
| 138 | // Load the next attribute element into the following register. If the element | ||
| 139 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 140 | // attribute. | ||
| 141 | next_element = (next_element + 1) % 4; | ||
| 142 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 143 | }; | ||
| 144 | |||
| 145 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 146 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 147 | StoreNextElement(reg_offset); | ||
| 148 | } | ||
| 149 | |||
| 150 | break; | ||
| 151 | } | ||
| 152 | case OpCode::Id::ST_L: { | ||
| 153 | UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", | ||
| 154 | static_cast<u32>(instr.st_l.unknown.Value())); | ||
| 155 | |||
| 156 | const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), | ||
| 157 | Immediate(static_cast<s32>(instr.smem_imm))); | ||
| 158 | |||
| 159 | switch (instr.ldst_sl.type.Value()) { | ||
| 160 | case Tegra::Shader::StoreType::Bytes32: | ||
| 161 | SetLocalMemory(bb, index, GetRegister(instr.gpr0)); | ||
| 162 | break; | ||
| 163 | default: | ||
| 164 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | ||
| 165 | static_cast<u32>(instr.ldst_sl.type.Value())); | ||
| 166 | } | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | case OpCode::Id::TEX: { | ||
| 170 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 171 | "AOFFI is not implemented"); | ||
| 172 | |||
| 173 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 174 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 175 | } | ||
| 176 | |||
| 177 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 178 | const bool is_array = instr.tex.array != 0; | ||
| 179 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 180 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 181 | WriteTexInstructionFloat( | ||
| 182 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | case OpCode::Id::TEXS: { | ||
| 186 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 187 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 188 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 189 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 190 | |||
| 191 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 192 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 193 | } | ||
| 194 | |||
| 195 | const Node4 components = | ||
| 196 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 197 | |||
| 198 | if (instr.texs.fp32_flag) { | ||
| 199 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 200 | } else { | ||
| 201 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 202 | } | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | case OpCode::Id::TLD4: { | ||
| 206 | ASSERT(instr.tld4.array == 0); | ||
| 207 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 208 | "AOFFI is not implemented"); | ||
| 209 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 210 | "NDV is not implemented"); | ||
| 211 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 212 | "PTP is not implemented"); | ||
| 213 | |||
| 214 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 215 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 216 | } | ||
| 217 | |||
| 218 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 219 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 220 | const bool is_array = instr.tld4.array != 0; | ||
| 221 | WriteTexInstructionFloat(bb, instr, | ||
| 222 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case OpCode::Id::TLD4S: { | ||
| 226 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 227 | "AOFFI is not implemented"); | ||
| 228 | |||
| 229 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 230 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 231 | } | ||
| 232 | |||
| 233 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 234 | const Node op_a = GetRegister(instr.gpr8); | ||
| 235 | const Node op_b = GetRegister(instr.gpr20); | ||
| 236 | |||
| 237 | std::vector<Node> coords; | ||
| 238 | |||
| 239 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 240 | if (depth_compare) { | ||
| 241 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 242 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 243 | coords.push_back(op_a); | ||
| 244 | coords.push_back(op_y); | ||
| 245 | coords.push_back(op_b); | ||
| 246 | } else { | ||
| 247 | coords.push_back(op_a); | ||
| 248 | coords.push_back(op_b); | ||
| 249 | } | ||
| 250 | const auto num_coords = static_cast<u32>(coords.size()); | ||
| 251 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | ||
| 252 | |||
| 253 | const auto& sampler = | ||
| 254 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 255 | |||
| 256 | Node4 values; | ||
| 257 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 258 | auto params = coords; | ||
| 259 | MetaTexture meta{sampler, element, num_coords}; | ||
| 260 | values[element] = | ||
| 261 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 262 | } | ||
| 263 | |||
| 264 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 265 | break; | ||
| 266 | } | ||
| 267 | case OpCode::Id::TXQ: { | ||
| 268 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 269 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 270 | } | ||
| 271 | |||
| 272 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 273 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 274 | // uses. This must be fixed at a later instance. | ||
| 275 | const auto& sampler = | ||
| 276 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 277 | |||
| 278 | switch (instr.txq.query_type) { | ||
| 279 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 280 | for (u32 element = 0; element < 4; ++element) { | ||
| 281 | MetaTexture meta{sampler, element}; | ||
| 282 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | ||
| 283 | std::move(meta), GetRegister(instr.gpr8)); | ||
| 284 | SetTemporal(bb, element, value); | ||
| 285 | } | ||
| 286 | for (u32 i = 0; i < 4; ++i) { | ||
| 287 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 288 | } | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | default: | ||
| 292 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 293 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 294 | } | ||
| 295 | break; | ||
| 296 | } | ||
| 297 | case OpCode::Id::TMML: { | ||
| 298 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 299 | "NDV is not implemented"); | ||
| 300 | |||
| 301 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 302 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 303 | } | ||
| 304 | |||
| 305 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 306 | const bool is_array = instr.tmml.array != 0; | ||
| 307 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 308 | |||
| 309 | std::vector<Node> coords; | ||
| 310 | |||
| 311 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 312 | switch (texture_type) { | ||
| 313 | case TextureType::Texture1D: | ||
| 314 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 315 | break; | ||
| 316 | case TextureType::Texture2D: | ||
| 317 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 318 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 319 | break; | ||
| 320 | default: | ||
| 321 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 322 | |||
| 323 | // Fallback to interpreting as a 2D texture for now | ||
| 324 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 325 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 326 | texture_type = TextureType::Texture2D; | ||
| 327 | } | ||
| 328 | |||
| 329 | for (u32 element = 0; element < 2; ++element) { | ||
| 330 | auto params = coords; | ||
| 331 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; | ||
| 332 | const Node value = | ||
| 333 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 334 | SetTemporal(bb, element, value); | ||
| 335 | } | ||
| 336 | for (u32 element = 0; element < 2; ++element) { | ||
| 337 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 338 | } | ||
| 339 | |||
| 340 | break; | ||
| 341 | } | ||
| 342 | case OpCode::Id::TLDS: { | ||
| 343 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 344 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 345 | |||
| 346 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 347 | "AOFFI is not implemented"); | ||
| 348 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 349 | |||
| 350 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 351 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 352 | } | ||
| 353 | |||
| 354 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 355 | break; | ||
| 356 | } | ||
| 357 | default: | ||
| 358 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 359 | } | ||
| 360 | |||
| 361 | return pc; | ||
| 362 | } | ||
| 363 | |||
| 364 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 365 | bool is_array, bool is_shadow) { | ||
| 366 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 367 | |||
| 368 | // If this sampler has already been used, return the existing mapping. | ||
| 369 | const auto itr = | ||
| 370 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 371 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 372 | if (itr != used_samplers.end()) { | ||
| 373 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 374 | itr->IsShadow() == is_shadow); | ||
| 375 | return *itr; | ||
| 376 | } | ||
| 377 | |||
| 378 | // Otherwise create a new mapping for this sampler | ||
| 379 | const std::size_t next_index = used_samplers.size(); | ||
| 380 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 381 | return *used_samplers.emplace(entry).first; | ||
| 382 | } | ||
| 383 | |||
| 384 | void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 385 | const Node4& components) { | ||
| 386 | u32 dest_elem = 0; | ||
| 387 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 388 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 389 | // Skip disabled components | ||
| 390 | continue; | ||
| 391 | } | ||
| 392 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 393 | } | ||
| 394 | // After writing values in temporals, move them to the real registers | ||
| 395 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 396 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | |||
| 400 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 401 | const Node4& components) { | ||
| 402 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 403 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 404 | |||
| 405 | u32 dest_elem = 0; | ||
| 406 | for (u32 component = 0; component < 4; ++component) { | ||
| 407 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 408 | continue; | ||
| 409 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 410 | } | ||
| 411 | |||
| 412 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 413 | if (i < 2) { | ||
| 414 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 415 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 416 | } else { | ||
| 417 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 418 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 419 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 420 | } | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, | ||
| 425 | const Node4& components) { | ||
| 426 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 427 | // float instruction). | ||
| 428 | |||
| 429 | Node4 values; | ||
| 430 | u32 dest_elem = 0; | ||
| 431 | for (u32 component = 0; component < 4; ++component) { | ||
| 432 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 433 | continue; | ||
| 434 | values[dest_elem++] = components[component]; | ||
| 435 | } | ||
| 436 | if (dest_elem == 0) | ||
| 437 | return; | ||
| 438 | |||
| 439 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 440 | |||
| 441 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 442 | if (dest_elem <= 2) { | ||
| 443 | SetRegister(bb, instr.gpr0, first_value); | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | SetTemporal(bb, 0, first_value); | ||
| 448 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 449 | |||
| 450 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 451 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 452 | } | ||
| 453 | |||
| 454 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 455 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 456 | std::size_t array_offset, std::size_t bias_offset, | ||
| 457 | std::vector<Node>&& coords) { | ||
| 458 | UNIMPLEMENTED_IF_MSG( | ||
| 459 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | ||
| 460 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | ||
| 461 | "This method is not supported."); | ||
| 462 | |||
| 463 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 464 | |||
| 465 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 466 | process_mode == TextureProcessMode::LL || | ||
| 467 | process_mode == TextureProcessMode::LLA; | ||
| 468 | |||
| 469 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 470 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 471 | const bool gl_lod_supported = | ||
| 472 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | ||
| 473 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | ||
| 474 | |||
| 475 | const OperationCode read_method = | ||
| 476 | lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; | ||
| 477 | |||
| 478 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 479 | |||
| 480 | std::optional<u32> array_offset_value; | ||
| 481 | if (is_array) | ||
| 482 | array_offset_value = static_cast<u32>(array_offset); | ||
| 483 | |||
| 484 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 485 | |||
| 486 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 487 | if (process_mode == TextureProcessMode::LZ) { | ||
| 488 | coords.push_back(Immediate(0.0f)); | ||
| 489 | } else { | ||
| 490 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 491 | // field with an offset depending on the usage of the other registers | ||
| 492 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | ||
| 493 | } | ||
| 494 | } | ||
| 495 | |||
| 496 | Node4 values; | ||
| 497 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 498 | auto params = coords; | ||
| 499 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | ||
| 500 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | ||
| 501 | } | ||
| 502 | |||
| 503 | return values; | ||
| 504 | } | ||
| 505 | |||
| 506 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 507 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 508 | const bool lod_bias_enabled = | ||
| 509 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 510 | |||
| 511 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 512 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 513 | // If enabled arrays index is always stored in the gpr8 field | ||
| 514 | const u64 array_register = instr.gpr8.Value(); | ||
| 515 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 516 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 517 | |||
| 518 | std::vector<Node> coords; | ||
| 519 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 520 | coords.push_back(GetRegister(coord_register + i)); | ||
| 521 | } | ||
| 522 | // 1D.DC in opengl the 2nd component is ignored. | ||
| 523 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 524 | coords.push_back(Immediate(0.0f)); | ||
| 525 | } | ||
| 526 | std::size_t array_offset{}; | ||
| 527 | if (is_array) { | ||
| 528 | array_offset = coords.size(); | ||
| 529 | coords.push_back(GetRegister(array_register)); | ||
| 530 | } | ||
| 531 | if (depth_compare) { | ||
| 532 | // Depth is always stored in the register signaled by gpr20 | ||
| 533 | // or in the next register if lod or bias are used | ||
| 534 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 535 | coords.push_back(GetRegister(depth_register)); | ||
| 536 | } | ||
| 537 | // Fill ignored coordinates | ||
| 538 | while (coords.size() < total_coord_count) { | ||
| 539 | coords.push_back(Immediate(0)); | ||
| 540 | } | ||
| 541 | |||
| 542 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | ||
| 543 | 0, std::move(coords)); | ||
| 544 | } | ||
| 545 | |||
| 546 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 547 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 548 | const bool lod_bias_enabled = | ||
| 549 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 550 | |||
| 551 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 552 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 553 | // If enabled arrays index is always stored in the gpr8 field | ||
| 554 | const u64 array_register = instr.gpr8.Value(); | ||
| 555 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 556 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 557 | const u64 last_coord_register = | ||
| 558 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 559 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 560 | : coord_register + 1; | ||
| 561 | |||
| 562 | std::vector<Node> coords; | ||
| 563 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 564 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 565 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 566 | } | ||
| 567 | |||
| 568 | std::size_t array_offset{}; | ||
| 569 | if (is_array) { | ||
| 570 | array_offset = coords.size(); | ||
| 571 | coords.push_back(GetRegister(array_register)); | ||
| 572 | } | ||
| 573 | if (depth_compare) { | ||
| 574 | // Depth is always stored in the register signaled by gpr20 | ||
| 575 | // or in the next register if lod or bias are used | ||
| 576 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 577 | coords.push_back(GetRegister(depth_register)); | ||
| 578 | } | ||
| 579 | // Fill ignored coordinates | ||
| 580 | while (coords.size() < total_coord_count) { | ||
| 581 | coords.push_back(Immediate(0)); | ||
| 582 | } | ||
| 583 | |||
| 584 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | ||
| 585 | (coord_count > 2 ? 1 : 0), std::move(coords)); | ||
| 586 | } | ||
| 587 | |||
| 588 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 589 | bool is_array) { | ||
| 590 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 591 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 592 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 593 | |||
| 594 | // If enabled arrays index is always stored in the gpr8 field | ||
| 595 | const u64 array_register = instr.gpr8.Value(); | ||
| 596 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 597 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 598 | |||
| 599 | std::vector<Node> coords; | ||
| 600 | |||
| 601 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 602 | coords.push_back(GetRegister(coord_register + i)); | ||
| 603 | } | ||
| 604 | std::optional<u32> array_offset; | ||
| 605 | if (is_array) { | ||
| 606 | array_offset = static_cast<u32>(coords.size()); | ||
| 607 | coords.push_back(GetRegister(array_register)); | ||
| 608 | } | ||
| 609 | |||
| 610 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 611 | |||
| 612 | Node4 values; | ||
| 613 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 614 | auto params = coords; | ||
| 615 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | ||
| 616 | values[element] = | ||
| 617 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 618 | } | ||
| 619 | |||
| 620 | return values; | ||
| 621 | } | ||
| 622 | |||
| 623 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 624 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 625 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | ||
| 626 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 627 | |||
| 628 | // If enabled arrays index is always stored in the gpr8 field | ||
| 629 | const u64 array_register = instr.gpr8.Value(); | ||
| 630 | // if is array gpr20 is used | ||
| 631 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 632 | |||
| 633 | const u64 last_coord_register = | ||
| 634 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 635 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 636 | : coord_register + 1; | ||
| 637 | |||
| 638 | std::vector<Node> coords; | ||
| 639 | |||
| 640 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 641 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 642 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 643 | } | ||
| 644 | std::optional<u32> array_offset; | ||
| 645 | if (is_array) { | ||
| 646 | array_offset = static_cast<u32>(coords.size()); | ||
| 647 | coords.push_back(GetRegister(array_register)); | ||
| 648 | } | ||
| 649 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 650 | |||
| 651 | if (lod_enabled) { | ||
| 652 | // When lod is used always is in grp20 | ||
| 653 | coords.push_back(GetRegister(instr.gpr20)); | ||
| 654 | } else { | ||
| 655 | coords.push_back(Immediate(0)); | ||
| 656 | } | ||
| 657 | |||
| 658 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 659 | |||
| 660 | Node4 values; | ||
| 661 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 662 | auto params = coords; | ||
| 663 | MetaTexture meta{sampler, element, coords_count, array_offset}; | ||
| 664 | values[element] = | ||
| 665 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 666 | } | ||
| 667 | return values; | ||
| 668 | } | ||
| 669 | |||
| 670 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 671 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 672 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 673 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 674 | |||
| 675 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 676 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 677 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 678 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 679 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 680 | } | ||
| 681 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 682 | total_coord_count += | ||
| 683 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 684 | |||
| 685 | return {coord_count, total_coord_count}; | ||
| 686 | } | ||
| 687 | |||
| 688 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp new file mode 100644 index 000000000..c1e5f4efb --- /dev/null +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::ConditionCode; | ||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Register; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::EXIT: { | ||
| 23 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 24 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", | ||
| 25 | static_cast<u32>(cc)); | ||
| 26 | |||
| 27 | switch (instr.flow.cond) { | ||
| 28 | case Tegra::Shader::FlowCondition::Always: | ||
| 29 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 30 | if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { | ||
| 31 | // If this is an unconditional exit then just end processing here, | ||
| 32 | // otherwise we have to account for the possibility of the condition | ||
| 33 | // not being met, so continue processing the next instruction. | ||
| 34 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 35 | } | ||
| 36 | break; | ||
| 37 | |||
| 38 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 39 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 40 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 41 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 42 | break; | ||
| 43 | |||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", | ||
| 46 | static_cast<u32>(instr.flow.cond.Value())); | ||
| 47 | } | ||
| 48 | break; | ||
| 49 | } | ||
| 50 | case OpCode::Id::KIL: { | ||
| 51 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 52 | |||
| 53 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 54 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", | ||
| 55 | static_cast<u32>(cc)); | ||
| 56 | |||
| 57 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | case OpCode::Id::MOV_SYS: { | ||
| 61 | switch (instr.sys20) { | ||
| 62 | case Tegra::Shader::SystemVariable::InvocationInfo: { | ||
| 63 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | ||
| 64 | SetRegister(bb, instr.gpr0, Immediate(0u)); | ||
| 65 | break; | ||
| 66 | } | ||
| 67 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 68 | // Config pack's third value is Y_NEGATE's state. | ||
| 69 | SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | default: | ||
| 73 | UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); | ||
| 74 | } | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | case OpCode::Id::BRA: { | ||
| 78 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 79 | "BRA with constant buffers are not implemented"); | ||
| 80 | |||
| 81 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 82 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 83 | |||
| 84 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 85 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 86 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 87 | } else { | ||
| 88 | bb.push_back(branch); | ||
| 89 | } | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | case OpCode::Id::SSY: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 94 | "Constant buffer flow is not supported"); | ||
| 95 | |||
| 96 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the | ||
| 97 | // target of the jump that the SYNC instruction will make. The SSY opcode has a similar | ||
| 98 | // structure to the BRA opcode. | ||
| 99 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 100 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||
| 101 | break; | ||
| 102 | } | ||
| 103 | case OpCode::Id::PBK: { | ||
| 104 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 105 | "Constant buffer PBK is not supported"); | ||
| 106 | |||
| 107 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but | ||
| 108 | // using SYNC on a PBK address will kill the shader execution. We don't emulate this because | ||
| 109 | // it's very unlikely a driver will emit such invalid shader. | ||
| 110 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 111 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SYNC: { | ||
| 115 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 116 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | ||
| 117 | static_cast<u32>(cc)); | ||
| 118 | |||
| 119 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 120 | bb.push_back(Operation(OperationCode::PopFlowStack)); | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | case OpCode::Id::BRK: { | ||
| 124 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 125 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | ||
| 126 | static_cast<u32>(cc)); | ||
| 127 | |||
| 128 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 129 | bb.push_back(Operation(OperationCode::PopFlowStack)); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case OpCode::Id::IPA: { | ||
| 133 | const auto& attribute = instr.attribute.fmt28; | ||
| 134 | const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | ||
| 135 | instr.ipa.sample_mode.Value()}; | ||
| 136 | |||
| 137 | const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); | ||
| 138 | const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); | ||
| 139 | |||
| 140 | SetRegister(bb, instr.gpr0, value); | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | case OpCode::Id::OUT_R: { | ||
| 144 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 145 | "Stream buffer is not supported"); | ||
| 146 | |||
| 147 | if (instr.out.emit) { | ||
| 148 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 149 | // Hardware uses pointers here but we just ignore it | ||
| 150 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 151 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 152 | } | ||
| 153 | if (instr.out.cut) { | ||
| 154 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case OpCode::Id::ISBERD: { | ||
| 159 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 160 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 161 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 162 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 163 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 164 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | case OpCode::Id::DEPBAR: { | ||
| 168 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | default: | ||
| 172 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 173 | } | ||
| 174 | |||
| 175 | return pc; | ||
| 176 | } | ||
| 177 | |||
| 178 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp new file mode 100644 index 000000000..1717f0653 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::PSETP: { | ||
| 22 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 24 | |||
| 25 | // We can't use the constant predicate as destination. | ||
| 26 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 27 | |||
| 28 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 29 | |||
| 30 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 31 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 32 | |||
| 33 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 34 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 35 | |||
| 36 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 37 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 38 | // enabled | ||
| 39 | SetPredicate(bb, instr.psetp.pred0, | ||
| 40 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 41 | second_pred)); | ||
| 42 | } | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | case OpCode::Id::CSETP: { | ||
| 46 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 47 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 48 | |||
| 49 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 50 | |||
| 51 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 52 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 53 | } | ||
| 54 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 55 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 56 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 57 | } | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | default: | ||
| 61 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 62 | } | ||
| 63 | |||
| 64 | return pc; | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp new file mode 100644 index 000000000..8bd15fb00 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_register.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp new file mode 100644 index 000000000..bdb4424a6 --- /dev/null +++ b/src/video_core/shader/decode/register_set_predicate.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 20 | |||
| 21 | const Node apply_mask = [&]() { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::R2P_IMM: | ||
| 24 | return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); | ||
| 25 | default: | ||
| 26 | UNREACHABLE(); | ||
| 27 | return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); | ||
| 28 | } | ||
| 29 | }(); | ||
| 30 | const Node mask = GetRegister(instr.gpr8); | ||
| 31 | const auto offset = static_cast<u32>(instr.r2p.byte) * 8; | ||
| 32 | |||
| 33 | constexpr u32 programmable_preds = 7; | ||
| 34 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | ||
| 35 | const auto shift = static_cast<u32>(pred); | ||
| 36 | |||
| 37 | const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); | ||
| 38 | const Node condition = | ||
| 39 | Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); | ||
| 40 | |||
| 41 | const Node value_compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 42 | const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); | ||
| 43 | |||
| 44 | const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); | ||
| 45 | bb.push_back(Conditional(condition, {code})); | ||
| 46 | } | ||
| 47 | |||
| 48 | return pc; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp new file mode 100644 index 000000000..85026bb37 --- /dev/null +++ b/src/video_core/shader/decode/shift.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetRegister(instr.gpr8); | ||
| 20 | const Node op_b = [&]() { | ||
| 21 | if (instr.is_b_imm) { | ||
| 22 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 23 | } else if (instr.is_b_gpr) { | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | } else { | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 27 | } | ||
| 28 | }(); | ||
| 29 | |||
| 30 | switch (opcode->get().GetId()) { | ||
| 31 | case OpCode::Id::SHR_C: | ||
| 32 | case OpCode::Id::SHR_R: | ||
| 33 | case OpCode::Id::SHR_IMM: { | ||
| 34 | const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, | ||
| 35 | instr.shift.is_signed, PRECISE, op_a, op_b); | ||
| 36 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 37 | SetRegister(bb, instr.gpr0, value); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case OpCode::Id::SHL_C: | ||
| 41 | case OpCode::Id::SHL_R: | ||
| 42 | case OpCode::Id::SHL_IMM: { | ||
| 43 | const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); | ||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | default: | ||
| 49 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 50 | } | ||
| 51 | |||
| 52 | return pc; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp new file mode 100644 index 000000000..c3432356d --- /dev/null +++ b/src/video_core/shader/decode/video.cpp | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | using Tegra::Shader::VideoType; | ||
| 16 | using Tegra::Shader::VmadShr; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | const Node op_a = | ||
| 23 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 24 | instr.video.type_a, instr.video.byte_height_a); | ||
| 25 | const Node op_b = [&]() { | ||
| 26 | if (instr.video.use_register_b) { | ||
| 27 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 28 | instr.video.signed_b, instr.video.type_b, | ||
| 29 | instr.video.byte_height_b); | ||
| 30 | } | ||
| 31 | if (instr.video.signed_b) { | ||
| 32 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 33 | return Immediate(static_cast<u32>(imm)); | ||
| 34 | } else { | ||
| 35 | return Immediate(instr.alu.GetImm20_16()); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | switch (opcode->get().GetId()) { | ||
| 40 | case OpCode::Id::VMAD: { | ||
| 41 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 42 | const Node op_c = GetRegister(instr.gpr39); | ||
| 43 | |||
| 44 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 45 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 46 | |||
| 47 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 48 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 49 | value = | ||
| 50 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 51 | } | ||
| 52 | |||
| 53 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 54 | SetRegister(bb, instr.gpr0, value); | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | case OpCode::Id::VSETP: { | ||
| 58 | // We can't use the constant predicate as destination. | ||
| 59 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 60 | |||
| 61 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 62 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 63 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 64 | |||
| 65 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 66 | |||
| 67 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 68 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 69 | |||
| 70 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 71 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 72 | // if enabled | ||
| 73 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 74 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 75 | } | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 80 | } | ||
| 81 | |||
| 82 | return pc; | ||
| 83 | } | ||
| 84 | |||
| 85 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, | ||
| 86 | Tegra::Shader::VideoType type, u64 byte_height) { | ||
| 87 | if (!is_chunk) { | ||
| 88 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 89 | } | ||
| 90 | const Node zero = Immediate(0); | ||
| 91 | |||
| 92 | switch (type) { | ||
| 93 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 94 | return BitfieldExtract(op, 0, 16); | ||
| 95 | case Tegra::Shader::VideoType::Size16_High: | ||
| 96 | return BitfieldExtract(op, 16, 16); | ||
| 97 | case Tegra::Shader::VideoType::Size32: | ||
| 98 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 99 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 100 | UNIMPLEMENTED(); | ||
| 101 | return zero; | ||
| 102 | case Tegra::Shader::VideoType::Invalid: | ||
| 103 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 104 | return zero; | ||
| 105 | default: | ||
| 106 | UNREACHABLE(); | ||
| 107 | return zero; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp new file mode 100644 index 000000000..0cd9cd1cc --- /dev/null +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 20 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 21 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 22 | "Condition codes generation in XMAD is not implemented"); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | |||
| 26 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 27 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 28 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 29 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 30 | const bool is_signed_c = is_signed_a; | ||
| 31 | |||
| 32 | auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { | ||
| 33 | switch (opcode->get().GetId()) { | ||
| 34 | case OpCode::Id::XMAD_CR: | ||
| 35 | return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), | ||
| 36 | GetRegister(instr.gpr39)}; | ||
| 37 | case OpCode::Id::XMAD_RR: | ||
| 38 | return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 39 | case OpCode::Id::XMAD_RC: | ||
| 40 | return {false, GetRegister(instr.gpr39), | ||
| 41 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 42 | case OpCode::Id::XMAD_IMM: | ||
| 43 | return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 44 | GetRegister(instr.gpr39)}; | ||
| 45 | } | ||
| 46 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 47 | return {false, Immediate(0), Immediate(0)}; | ||
| 48 | }(); | ||
| 49 | |||
| 50 | op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); | ||
| 51 | |||
| 52 | const Node original_b = op_b; | ||
| 53 | op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); | ||
| 54 | |||
| 55 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 56 | Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); | ||
| 57 | if (instr.xmad.product_shift_left) { | ||
| 58 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | ||
| 59 | } | ||
| 60 | |||
| 61 | const Node original_c = op_c; | ||
| 62 | op_c = [&]() { | ||
| 63 | switch (instr.xmad.mode) { | ||
| 64 | case Tegra::Shader::XmadMode::None: | ||
| 65 | return original_c; | ||
| 66 | case Tegra::Shader::XmadMode::CLo: | ||
| 67 | return BitfieldExtract(original_c, 0, 16); | ||
| 68 | case Tegra::Shader::XmadMode::CHi: | ||
| 69 | return BitfieldExtract(original_c, 16, 16); | ||
| 70 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 71 | const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 72 | NO_PRECISE, original_b, Immediate(16)); | ||
| 73 | return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, | ||
| 74 | shifted_b); | ||
| 75 | } | ||
| 76 | default: | ||
| 77 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); | ||
| 78 | return Immediate(0); | ||
| 79 | } | ||
| 80 | }(); | ||
| 81 | |||
| 82 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 83 | Node sum = Operation(OperationCode::IAdd, product, op_c); | ||
| 84 | if (is_merge) { | ||
| 85 | const Node a = BitfieldExtract(sum, 0, 16); | ||
| 86 | const Node b = | ||
| 87 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); | ||
| 88 | sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); | ||
| 89 | } | ||
| 90 | |||
| 91 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 92 | SetRegister(bb, instr.gpr0, sum); | ||
| 93 | |||
| 94 | return pc; | ||
| 95 | } | ||
| 96 | |||
| 97 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp new file mode 100644 index 000000000..d7747103e --- /dev/null +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -0,0 +1,444 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/logging/log.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::Attribute; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::IpaMode; | ||
| 19 | using Tegra::Shader::Pred; | ||
| 20 | using Tegra::Shader::PredCondition; | ||
| 21 | using Tegra::Shader::PredOperation; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | |||
| 24 | Node ShaderIR::StoreNode(NodeData&& node_data) { | ||
| 25 | auto store = std::make_unique<NodeData>(node_data); | ||
| 26 | const Node node = store.get(); | ||
| 27 | stored_nodes.push_back(std::move(store)); | ||
| 28 | return node; | ||
| 29 | } | ||
| 30 | |||
| 31 | Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { | ||
| 32 | return StoreNode(ConditionalNode(condition, std::move(code))); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node ShaderIR::Comment(const std::string& text) { | ||
| 36 | return StoreNode(CommentNode(text)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShaderIR::Immediate(u32 value) { | ||
| 40 | return StoreNode(ImmediateNode(value)); | ||
| 41 | } | ||
| 42 | |||
| 43 | Node ShaderIR::GetRegister(Register reg) { | ||
| 44 | if (reg != Register::ZeroIndex) { | ||
| 45 | used_registers.insert(static_cast<u32>(reg)); | ||
| 46 | } | ||
| 47 | return StoreNode(GprNode(reg)); | ||
| 48 | } | ||
| 49 | |||
| 50 | Node ShaderIR::GetImmediate19(Instruction instr) { | ||
| 51 | return Immediate(instr.alu.GetImm20_19()); | ||
| 52 | } | ||
| 53 | |||
| 54 | Node ShaderIR::GetImmediate32(Instruction instr) { | ||
| 55 | return Immediate(instr.alu.GetImm20_32()); | ||
| 56 | } | ||
| 57 | |||
| 58 | Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { | ||
| 59 | const auto index = static_cast<u32>(index_); | ||
| 60 | const auto offset = static_cast<u32>(offset_); | ||
| 61 | |||
| 62 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | ||
| 63 | entry->second.MarkAsUsed(offset); | ||
| 64 | |||
| 65 | return StoreNode(CbufNode(index, Immediate(offset))); | ||
| 66 | } | ||
| 67 | |||
| 68 | Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | ||
| 69 | const auto index = static_cast<u32>(index_); | ||
| 70 | const auto offset = static_cast<u32>(offset_); | ||
| 71 | |||
| 72 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | ||
| 73 | entry->second.MarkAsUsedIndirect(); | ||
| 74 | |||
| 75 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | ||
| 76 | return StoreNode(CbufNode(index, final_offset)); | ||
| 77 | } | ||
| 78 | |||
| 79 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | ||
| 80 | const auto pred = static_cast<Pred>(pred_); | ||
| 81 | if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { | ||
| 82 | used_predicates.insert(pred); | ||
| 83 | } | ||
| 84 | |||
| 85 | return StoreNode(PredicateNode(pred, negated)); | ||
| 86 | } | ||
| 87 | |||
| 88 | Node ShaderIR::GetPredicate(bool immediate) { | ||
| 89 | return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); | ||
| 90 | } | ||
| 91 | |||
| 92 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, | ||
| 93 | const Tegra::Shader::IpaMode& input_mode, Node buffer) { | ||
| 94 | const auto [entry, is_new] = | ||
| 95 | used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); | ||
| 96 | entry->second.insert(input_mode); | ||
| 97 | |||
| 98 | return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); | ||
| 99 | } | ||
| 100 | |||
| 101 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 102 | if (index == Attribute::Index::ClipDistances0123 || | ||
| 103 | index == Attribute::Index::ClipDistances4567) { | ||
| 104 | const auto clip_index = | ||
| 105 | static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element); | ||
| 106 | used_clip_distances.at(clip_index) = true; | ||
| 107 | } | ||
| 108 | used_output_attributes.insert(index); | ||
| 109 | |||
| 110 | return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); | ||
| 111 | } | ||
| 112 | |||
| 113 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | ||
| 114 | const Node node = StoreNode(InternalFlagNode(flag)); | ||
| 115 | if (negated) { | ||
| 116 | return Operation(OperationCode::LogicalNegate, node); | ||
| 117 | } | ||
| 118 | return node; | ||
| 119 | } | ||
| 120 | |||
| 121 | Node ShaderIR::GetLocalMemory(Node address) { | ||
| 122 | return StoreNode(LmemNode(address)); | ||
| 123 | } | ||
| 124 | |||
| 125 | Node ShaderIR::GetTemporal(u32 id) { | ||
| 126 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 127 | } | ||
| 128 | |||
| 129 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | ||
| 130 | if (absolute) { | ||
| 131 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | ||
| 132 | } | ||
| 133 | if (negate) { | ||
| 134 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | ||
| 135 | } | ||
| 136 | return value; | ||
| 137 | } | ||
| 138 | |||
| 139 | Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | ||
| 140 | if (!saturate) { | ||
| 141 | return value; | ||
| 142 | } | ||
| 143 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 144 | const Node positive_one = Immediate(1.0f); | ||
| 145 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | ||
| 146 | } | ||
| 147 | |||
| 148 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | ||
| 149 | switch (size) { | ||
| 150 | case Register::Size::Byte: | ||
| 151 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | ||
| 152 | Immediate(24)); | ||
| 153 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | ||
| 154 | Immediate(24)); | ||
| 155 | return value; | ||
| 156 | case Register::Size::Short: | ||
| 157 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | ||
| 158 | Immediate(16)); | ||
| 159 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | ||
| 160 | Immediate(16)); | ||
| 161 | case Register::Size::Word: | ||
| 162 | // Default - do nothing | ||
| 163 | return value; | ||
| 164 | default: | ||
| 165 | UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); | ||
| 166 | return value; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { | ||
| 171 | if (!is_signed) { | ||
| 172 | // Absolute or negate on an unsigned is pointless | ||
| 173 | return value; | ||
| 174 | } | ||
| 175 | if (absolute) { | ||
| 176 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | ||
| 177 | } | ||
| 178 | if (negate) { | ||
| 179 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | ||
| 180 | } | ||
| 181 | return value; | ||
| 182 | } | ||
| 183 | |||
| 184 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | ||
| 185 | const Node value = Immediate(instr.half_imm.PackImmediates()); | ||
| 186 | if (!has_negation) { | ||
| 187 | return value; | ||
| 188 | } | ||
| 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 191 | |||
| 192 | return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); | ||
| 193 | } | ||
| 194 | |||
| 195 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | ||
| 196 | switch (merge) { | ||
| 197 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 198 | return src; | ||
| 199 | case Tegra::Shader::HalfMerge::F32: | ||
| 200 | return Operation(OperationCode::HMergeF32, src); | ||
| 201 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 202 | return Operation(OperationCode::HMergeH0, dest, src); | ||
| 203 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 204 | return Operation(OperationCode::HMergeH1, dest, src); | ||
| 205 | } | ||
| 206 | UNREACHABLE(); | ||
| 207 | return src; | ||
| 208 | } | ||
| 209 | |||
| 210 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | ||
| 211 | if (absolute) { | ||
| 212 | value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); | ||
| 213 | } | ||
| 214 | if (negate) { | ||
| 215 | value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), | ||
| 216 | GetPredicate(true)); | ||
| 217 | } | ||
| 218 | return value; | ||
| 219 | } | ||
| 220 | |||
| 221 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | ||
| 222 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 223 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, | ||
| 224 | {PredCondition::Equal, OperationCode::LogicalFEqual}, | ||
| 225 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, | ||
| 226 | {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, | ||
| 227 | {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, | ||
| 228 | {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, | ||
| 229 | {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, | ||
| 230 | {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, | ||
| 231 | {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, | ||
| 232 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, | ||
| 233 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; | ||
| 234 | |||
| 235 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 236 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 237 | "Unknown predicate comparison operation"); | ||
| 238 | |||
| 239 | Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | ||
| 240 | |||
| 241 | if (condition == PredCondition::LessThanWithNan || | ||
| 242 | condition == PredCondition::NotEqualWithNan || | ||
| 243 | condition == PredCondition::LessEqualWithNan || | ||
| 244 | condition == PredCondition::GreaterThanWithNan || | ||
| 245 | condition == PredCondition::GreaterEqualWithNan) { | ||
| 246 | |||
| 247 | predicate = Operation(OperationCode::LogicalOr, predicate, | ||
| 248 | Operation(OperationCode::LogicalFIsNan, op_a)); | ||
| 249 | predicate = Operation(OperationCode::LogicalOr, predicate, | ||
| 250 | Operation(OperationCode::LogicalFIsNan, op_b)); | ||
| 251 | } | ||
| 252 | |||
| 253 | return predicate; | ||
| 254 | } | ||
| 255 | |||
| 256 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | ||
| 257 | Node op_b) { | ||
| 258 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 259 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, | ||
| 260 | {PredCondition::Equal, OperationCode::LogicalIEqual}, | ||
| 261 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, | ||
| 262 | {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, | ||
| 263 | {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, | ||
| 264 | {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, | ||
| 265 | {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, | ||
| 266 | {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, | ||
| 267 | {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, | ||
| 268 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, | ||
| 269 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; | ||
| 270 | |||
| 271 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 272 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 273 | "Unknown predicate comparison operation"); | ||
| 274 | |||
| 275 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | ||
| 276 | |||
| 277 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 278 | condition == PredCondition::NotEqualWithNan || | ||
| 279 | condition == PredCondition::LessEqualWithNan || | ||
| 280 | condition == PredCondition::GreaterThanWithNan || | ||
| 281 | condition == PredCondition::GreaterEqualWithNan, | ||
| 282 | "NaN comparisons for integers are not implemented"); | ||
| 283 | return predicate; | ||
| 284 | } | ||
| 285 | |||
| 286 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | ||
| 287 | const MetaHalfArithmetic& meta, Node op_a, Node op_b) { | ||
| 288 | |||
| 289 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 290 | condition == PredCondition::NotEqualWithNan || | ||
| 291 | condition == PredCondition::LessEqualWithNan || | ||
| 292 | condition == PredCondition::GreaterThanWithNan || | ||
| 293 | condition == PredCondition::GreaterEqualWithNan, | ||
| 294 | "Unimplemented NaN comparison for half floats"); | ||
| 295 | |||
| 296 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 297 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, | ||
| 298 | {PredCondition::Equal, OperationCode::Logical2HEqual}, | ||
| 299 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, | ||
| 300 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, | ||
| 301 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, | ||
| 302 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, | ||
| 303 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, | ||
| 304 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, | ||
| 305 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, | ||
| 306 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, | ||
| 307 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; | ||
| 308 | |||
| 309 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 310 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 311 | "Unknown predicate comparison operation"); | ||
| 312 | |||
| 313 | const Node predicate = Operation(comparison->second, meta, op_a, op_b); | ||
| 314 | |||
| 315 | return predicate; | ||
| 316 | } | ||
| 317 | |||
| 318 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | ||
| 319 | static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { | ||
| 320 | {PredOperation::And, OperationCode::LogicalAnd}, | ||
| 321 | {PredOperation::Or, OperationCode::LogicalOr}, | ||
| 322 | {PredOperation::Xor, OperationCode::LogicalXor}, | ||
| 323 | }; | ||
| 324 | |||
| 325 | const auto op = PredicateOperationTable.find(operation); | ||
| 326 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); | ||
| 327 | return op->second; | ||
| 328 | } | ||
| 329 | |||
| 330 | Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | ||
| 331 | switch (cc) { | ||
| 332 | case Tegra::Shader::ConditionCode::NEU: | ||
| 333 | return GetInternalFlag(InternalFlag::Zero, true); | ||
| 334 | default: | ||
| 335 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | ||
| 336 | return GetPredicate(static_cast<u64>(Pred::NeverExecute)); | ||
| 337 | } | ||
| 338 | } | ||
| 339 | |||
| 340 | void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { | ||
| 341 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | ||
| 342 | } | ||
| 343 | |||
| 344 | void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { | ||
| 345 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | ||
| 346 | } | ||
| 347 | |||
| 348 | void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { | ||
| 349 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | ||
| 350 | } | ||
| 351 | |||
| 352 | void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { | ||
| 353 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | ||
| 354 | } | ||
| 355 | |||
| 356 | void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { | ||
| 357 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | ||
| 358 | } | ||
| 359 | |||
| 360 | void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { | ||
| 361 | if (!sets_cc) { | ||
| 362 | return; | ||
| 363 | } | ||
| 364 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | ||
| 365 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | ||
| 366 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 367 | } | ||
| 368 | |||
| 369 | void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { | ||
| 370 | if (!sets_cc) { | ||
| 371 | return; | ||
| 372 | } | ||
| 373 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | ||
| 374 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | ||
| 375 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 376 | } | ||
| 377 | |||
| 378 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | ||
| 379 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | ||
| 380 | Immediate(bits)); | ||
| 381 | } | ||
| 382 | |||
| 383 | /*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, | ||
| 384 | bool is_signed) { | ||
| 385 | if (is_signed) { | ||
| 386 | return operation_code; | ||
| 387 | } | ||
| 388 | switch (operation_code) { | ||
| 389 | case OperationCode::FCastInteger: | ||
| 390 | return OperationCode::FCastUInteger; | ||
| 391 | case OperationCode::IAdd: | ||
| 392 | return OperationCode::UAdd; | ||
| 393 | case OperationCode::IMul: | ||
| 394 | return OperationCode::UMul; | ||
| 395 | case OperationCode::IDiv: | ||
| 396 | return OperationCode::UDiv; | ||
| 397 | case OperationCode::IMin: | ||
| 398 | return OperationCode::UMin; | ||
| 399 | case OperationCode::IMax: | ||
| 400 | return OperationCode::UMax; | ||
| 401 | case OperationCode::ICastFloat: | ||
| 402 | return OperationCode::UCastFloat; | ||
| 403 | case OperationCode::ICastUnsigned: | ||
| 404 | return OperationCode::UCastSigned; | ||
| 405 | case OperationCode::ILogicalShiftLeft: | ||
| 406 | return OperationCode::ULogicalShiftLeft; | ||
| 407 | case OperationCode::ILogicalShiftRight: | ||
| 408 | return OperationCode::ULogicalShiftRight; | ||
| 409 | case OperationCode::IArithmeticShiftRight: | ||
| 410 | return OperationCode::UArithmeticShiftRight; | ||
| 411 | case OperationCode::IBitwiseAnd: | ||
| 412 | return OperationCode::UBitwiseAnd; | ||
| 413 | case OperationCode::IBitwiseOr: | ||
| 414 | return OperationCode::UBitwiseOr; | ||
| 415 | case OperationCode::IBitwiseXor: | ||
| 416 | return OperationCode::UBitwiseXor; | ||
| 417 | case OperationCode::IBitwiseNot: | ||
| 418 | return OperationCode::UBitwiseNot; | ||
| 419 | case OperationCode::IBitfieldInsert: | ||
| 420 | return OperationCode::UBitfieldInsert; | ||
| 421 | case OperationCode::IBitCount: | ||
| 422 | return OperationCode::UBitCount; | ||
| 423 | case OperationCode::LogicalILessThan: | ||
| 424 | return OperationCode::LogicalULessThan; | ||
| 425 | case OperationCode::LogicalIEqual: | ||
| 426 | return OperationCode::LogicalUEqual; | ||
| 427 | case OperationCode::LogicalILessEqual: | ||
| 428 | return OperationCode::LogicalULessEqual; | ||
| 429 | case OperationCode::LogicalIGreaterThan: | ||
| 430 | return OperationCode::LogicalUGreaterThan; | ||
| 431 | case OperationCode::LogicalINotEqual: | ||
| 432 | return OperationCode::LogicalUNotEqual; | ||
| 433 | case OperationCode::LogicalIGreaterEqual: | ||
| 434 | return OperationCode::LogicalUGreaterEqual; | ||
| 435 | case OperationCode::INegate: | ||
| 436 | UNREACHABLE_MSG("Can't negate an unsigned integer"); | ||
| 437 | case OperationCode::IAbsolute: | ||
| 438 | UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); | ||
| 439 | } | ||
| 440 | UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); | ||
| 441 | return {}; | ||
| 442 | } | ||
| 443 | |||
| 444 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h new file mode 100644 index 000000000..96e7df6b6 --- /dev/null +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -0,0 +1,793 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstring> | ||
| 9 | #include <map> | ||
| 10 | #include <set> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <variant> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | ||
| 18 | #include "video_core/engines/shader_bytecode.h" | ||
| 19 | #include "video_core/engines/shader_header.h" | ||
| 20 | |||
| 21 | namespace VideoCommon::Shader { | ||
| 22 | |||
| 23 | class OperationNode; | ||
| 24 | class ConditionalNode; | ||
| 25 | class GprNode; | ||
| 26 | class ImmediateNode; | ||
| 27 | class InternalFlagNode; | ||
| 28 | class PredicateNode; | ||
| 29 | class AbufNode; ///< Attribute buffer | ||
| 30 | class CbufNode; ///< Constant buffer | ||
| 31 | class LmemNode; ///< Local memory | ||
| 32 | class GmemNode; ///< Global memory | ||
| 33 | class CommentNode; | ||
| 34 | |||
| 35 | using ProgramCode = std::vector<u64>; | ||
| 36 | |||
| 37 | using NodeData = | ||
| 38 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, | ||
| 39 | PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; | ||
| 40 | using Node = const NodeData*; | ||
| 41 | using Node4 = std::array<Node, 4>; | ||
| 42 | using BasicBlock = std::vector<Node>; | ||
| 43 | |||
| 44 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||
| 45 | |||
| 46 | enum class OperationCode { | ||
| 47 | Assign, /// (float& dest, float src) -> void | ||
| 48 | |||
| 49 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 50 | |||
| 51 | FAdd, /// (MetaArithmetic, float a, float b) -> float | ||
| 52 | FMul, /// (MetaArithmetic, float a, float b) -> float | ||
| 53 | FDiv, /// (MetaArithmetic, float a, float b) -> float | ||
| 54 | FFma, /// (MetaArithmetic, float a, float b, float c) -> float | ||
| 55 | FNegate, /// (MetaArithmetic, float a) -> float | ||
| 56 | FAbsolute, /// (MetaArithmetic, float a) -> float | ||
| 57 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | ||
| 58 | FMin, /// (MetaArithmetic, float a, float b) -> float | ||
| 59 | FMax, /// (MetaArithmetic, float a, float b) -> float | ||
| 60 | FCos, /// (MetaArithmetic, float a) -> float | ||
| 61 | FSin, /// (MetaArithmetic, float a) -> float | ||
| 62 | FExp2, /// (MetaArithmetic, float a) -> float | ||
| 63 | FLog2, /// (MetaArithmetic, float a) -> float | ||
| 64 | FInverseSqrt, /// (MetaArithmetic, float a) -> float | ||
| 65 | FSqrt, /// (MetaArithmetic, float a) -> float | ||
| 66 | FRoundEven, /// (MetaArithmetic, float a) -> float | ||
| 67 | FFloor, /// (MetaArithmetic, float a) -> float | ||
| 68 | FCeil, /// (MetaArithmetic, float a) -> float | ||
| 69 | FTrunc, /// (MetaArithmetic, float a) -> float | ||
| 70 | FCastInteger, /// (MetaArithmetic, int a) -> float | ||
| 71 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | ||
| 72 | |||
| 73 | IAdd, /// (MetaArithmetic, int a, int b) -> int | ||
| 74 | IMul, /// (MetaArithmetic, int a, int b) -> int | ||
| 75 | IDiv, /// (MetaArithmetic, int a, int b) -> int | ||
| 76 | INegate, /// (MetaArithmetic, int a) -> int | ||
| 77 | IAbsolute, /// (MetaArithmetic, int a) -> int | ||
| 78 | IMin, /// (MetaArithmetic, int a, int b) -> int | ||
| 79 | IMax, /// (MetaArithmetic, int a, int b) -> int | ||
| 80 | ICastFloat, /// (MetaArithmetic, float a) -> int | ||
| 81 | ICastUnsigned, /// (MetaArithmetic, uint a) -> int | ||
| 82 | ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int | ||
| 83 | ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 84 | IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 85 | IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int | ||
| 86 | IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int | ||
| 87 | IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int | ||
| 88 | IBitwiseNot, /// (MetaArithmetic, int a) -> int | ||
| 89 | IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int | ||
| 90 | IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int | ||
| 91 | IBitCount, /// (MetaArithmetic, int) -> int | ||
| 92 | |||
| 93 | UAdd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 94 | UMul, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 95 | UDiv, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 96 | UMin, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 97 | UMax, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 98 | UCastFloat, /// (MetaArithmetic, float a) -> uint | ||
| 99 | UCastSigned, /// (MetaArithmetic, int a) -> uint | ||
| 100 | ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 101 | ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 102 | UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 103 | UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 104 | UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 105 | UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 106 | UBitwiseNot, /// (MetaArithmetic, uint a) -> uint | ||
| 107 | UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint | ||
| 108 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | ||
| 109 | UBitCount, /// (MetaArithmetic, uint) -> uint | ||
| 110 | |||
| 111 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 112 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 113 | HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 114 | HAbsolute, /// (f16vec2 a) -> f16vec2 | ||
| 115 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | ||
| 116 | HMergeF32, /// (f16vec2 src) -> float | ||
| 117 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 118 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 119 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 120 | |||
| 121 | LogicalAssign, /// (bool& dst, bool src) -> void | ||
| 122 | LogicalAnd, /// (bool a, bool b) -> bool | ||
| 123 | LogicalOr, /// (bool a, bool b) -> bool | ||
| 124 | LogicalXor, /// (bool a, bool b) -> bool | ||
| 125 | LogicalNegate, /// (bool a) -> bool | ||
| 126 | LogicalPick2, /// (bool2 pair, uint index) -> bool | ||
| 127 | LogicalAll2, /// (bool2 a) -> bool | ||
| 128 | LogicalAny2, /// (bool2 a) -> bool | ||
| 129 | |||
| 130 | LogicalFLessThan, /// (float a, float b) -> bool | ||
| 131 | LogicalFEqual, /// (float a, float b) -> bool | ||
| 132 | LogicalFLessEqual, /// (float a, float b) -> bool | ||
| 133 | LogicalFGreaterThan, /// (float a, float b) -> bool | ||
| 134 | LogicalFNotEqual, /// (float a, float b) -> bool | ||
| 135 | LogicalFGreaterEqual, /// (float a, float b) -> bool | ||
| 136 | LogicalFIsNan, /// (float a) -> bool | ||
| 137 | |||
| 138 | LogicalILessThan, /// (int a, int b) -> bool | ||
| 139 | LogicalIEqual, /// (int a, int b) -> bool | ||
| 140 | LogicalILessEqual, /// (int a, int b) -> bool | ||
| 141 | LogicalIGreaterThan, /// (int a, int b) -> bool | ||
| 142 | LogicalINotEqual, /// (int a, int b) -> bool | ||
| 143 | LogicalIGreaterEqual, /// (int a, int b) -> bool | ||
| 144 | |||
| 145 | LogicalULessThan, /// (uint a, uint b) -> bool | ||
| 146 | LogicalUEqual, /// (uint a, uint b) -> bool | ||
| 147 | LogicalULessEqual, /// (uint a, uint b) -> bool | ||
| 148 | LogicalUGreaterThan, /// (uint a, uint b) -> bool | ||
| 149 | LogicalUNotEqual, /// (uint a, uint b) -> bool | ||
| 150 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | ||
| 151 | |||
| 152 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 153 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 154 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 155 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 158 | |||
| 159 | F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 160 | F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 161 | F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 162 | F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | ||
| 163 | F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 164 | F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 | ||
| 165 | |||
| 166 | Branch, /// (uint branch_target) -> void | ||
| 167 | PushFlowStack, /// (uint branch_target) -> void | ||
| 168 | PopFlowStack, /// () -> void | ||
| 169 | Exit, /// () -> void | ||
| 170 | Discard, /// () -> void | ||
| 171 | |||
| 172 | EmitVertex, /// () -> void | ||
| 173 | EndPrimitive, /// () -> void | ||
| 174 | |||
| 175 | YNegate, /// () -> float | ||
| 176 | |||
| 177 | Amount, | ||
| 178 | }; | ||
| 179 | |||
| 180 | enum class InternalFlag { | ||
| 181 | Zero = 0, | ||
| 182 | Sign = 1, | ||
| 183 | Carry = 2, | ||
| 184 | Overflow = 3, | ||
| 185 | Amount = 4, | ||
| 186 | }; | ||
| 187 | |||
| 188 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 189 | enum class ExitMethod { | ||
| 190 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 191 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 192 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 193 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 194 | }; | ||
| 195 | |||
| 196 | class Sampler { | ||
| 197 | public: | ||
| 198 | explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, | ||
| 199 | bool is_array, bool is_shadow) | ||
| 200 | : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} | ||
| 201 | |||
| 202 | std::size_t GetOffset() const { | ||
| 203 | return offset; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::size_t GetIndex() const { | ||
| 207 | return index; | ||
| 208 | } | ||
| 209 | |||
| 210 | Tegra::Shader::TextureType GetType() const { | ||
| 211 | return type; | ||
| 212 | } | ||
| 213 | |||
| 214 | bool IsArray() const { | ||
| 215 | return is_array; | ||
| 216 | } | ||
| 217 | |||
| 218 | bool IsShadow() const { | ||
| 219 | return is_shadow; | ||
| 220 | } | ||
| 221 | |||
| 222 | bool operator<(const Sampler& rhs) const { | ||
| 223 | return std::tie(offset, index, type, is_array, is_shadow) < | ||
| 224 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); | ||
| 225 | } | ||
| 226 | |||
| 227 | private: | ||
| 228 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||
| 229 | /// instruction. | ||
| 230 | std::size_t offset{}; | ||
| 231 | std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. | ||
| 232 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | ||
| 233 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | ||
| 234 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 235 | }; | ||
| 236 | |||
| 237 | class ConstBuffer { | ||
| 238 | public: | ||
| 239 | void MarkAsUsed(u64 offset) { | ||
| 240 | max_offset = std::max(max_offset, static_cast<u32>(offset)); | ||
| 241 | } | ||
| 242 | |||
| 243 | void MarkAsUsedIndirect() { | ||
| 244 | is_indirect = true; | ||
| 245 | } | ||
| 246 | |||
| 247 | bool IsIndirect() const { | ||
| 248 | return is_indirect; | ||
| 249 | } | ||
| 250 | |||
| 251 | u32 GetSize() const { | ||
| 252 | return max_offset + 1; | ||
| 253 | } | ||
| 254 | |||
| 255 | private: | ||
| 256 | u32 max_offset{}; | ||
| 257 | bool is_indirect{}; | ||
| 258 | }; | ||
| 259 | |||
| 260 | struct MetaArithmetic { | ||
| 261 | bool precise{}; | ||
| 262 | }; | ||
| 263 | |||
| 264 | struct MetaHalfArithmetic { | ||
| 265 | bool precise{}; | ||
| 266 | std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, | ||
| 267 | Tegra::Shader::HalfType::H0_H1, | ||
| 268 | Tegra::Shader::HalfType::H0_H1}; | ||
| 269 | }; | ||
| 270 | |||
| 271 | struct MetaTexture { | ||
| 272 | const Sampler& sampler; | ||
| 273 | u32 element{}; | ||
| 274 | u32 coords_count{}; | ||
| 275 | std::optional<u32> array_index; | ||
| 276 | }; | ||
| 277 | |||
| 278 | constexpr MetaArithmetic PRECISE = {true}; | ||
| 279 | constexpr MetaArithmetic NO_PRECISE = {false}; | ||
| 280 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; | ||
| 281 | |||
| 282 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; | ||
| 283 | |||
| 284 | /// Holds any kind of operation that can be done in the IR | ||
| 285 | class OperationNode final { | ||
| 286 | public: | ||
| 287 | template <typename... T> | ||
| 288 | explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} | ||
| 289 | |||
| 290 | template <typename... T> | ||
| 291 | explicit constexpr OperationNode(OperationCode code, Meta&& meta) | ||
| 292 | : code{code}, meta{std::move(meta)} {} | ||
| 293 | |||
| 294 | template <typename... T> | ||
| 295 | explicit constexpr OperationNode(OperationCode code, const T*... operands) | ||
| 296 | : OperationNode(code, {}, operands...) {} | ||
| 297 | |||
| 298 | template <typename... T> | ||
| 299 | explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) | ||
| 300 | : code{code}, meta{std::move(meta)} { | ||
| 301 | |||
| 302 | auto operands_list = {operands_...}; | ||
| 303 | for (auto& operand : operands_list) { | ||
| 304 | operands.push_back(operand); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) | ||
| 309 | : code{code}, meta{meta}, operands{std::move(operands)} {} | ||
| 310 | |||
| 311 | explicit OperationNode(OperationCode code, std::vector<Node>&& operands) | ||
| 312 | : code{code}, meta{}, operands{std::move(operands)} {} | ||
| 313 | |||
| 314 | OperationCode GetCode() const { | ||
| 315 | return code; | ||
| 316 | } | ||
| 317 | |||
| 318 | const Meta& GetMeta() const { | ||
| 319 | return meta; | ||
| 320 | } | ||
| 321 | |||
| 322 | std::size_t GetOperandsCount() const { | ||
| 323 | return operands.size(); | ||
| 324 | } | ||
| 325 | |||
| 326 | Node operator[](std::size_t operand_index) const { | ||
| 327 | return operands.at(operand_index); | ||
| 328 | } | ||
| 329 | |||
| 330 | private: | ||
| 331 | const OperationCode code; | ||
| 332 | const Meta meta; | ||
| 333 | std::vector<Node> operands; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | ||
| 337 | class ConditionalNode final { | ||
| 338 | public: | ||
| 339 | explicit ConditionalNode(Node condition, std::vector<Node>&& code) | ||
| 340 | : condition{condition}, code{std::move(code)} {} | ||
| 341 | |||
| 342 | Node GetCondition() const { | ||
| 343 | return condition; | ||
| 344 | } | ||
| 345 | |||
| 346 | const std::vector<Node>& GetCode() const { | ||
| 347 | return code; | ||
| 348 | } | ||
| 349 | |||
| 350 | private: | ||
| 351 | const Node condition; ///< Condition to be satisfied | ||
| 352 | std::vector<Node> code; ///< Code to execute | ||
| 353 | }; | ||
| 354 | |||
| 355 | /// A general purpose register | ||
| 356 | class GprNode final { | ||
| 357 | public: | ||
| 358 | explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} | ||
| 359 | |||
| 360 | u32 GetIndex() const { | ||
| 361 | return static_cast<u32>(index); | ||
| 362 | } | ||
| 363 | |||
| 364 | private: | ||
| 365 | const Tegra::Shader::Register index; | ||
| 366 | }; | ||
| 367 | |||
| 368 | /// A 32-bits value that represents an immediate value | ||
| 369 | class ImmediateNode final { | ||
| 370 | public: | ||
| 371 | explicit constexpr ImmediateNode(u32 value) : value{value} {} | ||
| 372 | |||
| 373 | u32 GetValue() const { | ||
| 374 | return value; | ||
| 375 | } | ||
| 376 | |||
| 377 | private: | ||
| 378 | const u32 value; | ||
| 379 | }; | ||
| 380 | |||
| 381 | /// One of Maxwell's internal flags | ||
| 382 | class InternalFlagNode final { | ||
| 383 | public: | ||
| 384 | explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} | ||
| 385 | |||
| 386 | InternalFlag GetFlag() const { | ||
| 387 | return flag; | ||
| 388 | } | ||
| 389 | |||
| 390 | private: | ||
| 391 | const InternalFlag flag; | ||
| 392 | }; | ||
| 393 | |||
| 394 | /// A predicate register, it can be negated without aditional nodes | ||
| 395 | class PredicateNode final { | ||
| 396 | public: | ||
| 397 | explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) | ||
| 398 | : index{index}, negated{negated} {} | ||
| 399 | |||
| 400 | Tegra::Shader::Pred GetIndex() const { | ||
| 401 | return index; | ||
| 402 | } | ||
| 403 | |||
| 404 | bool IsNegated() const { | ||
| 405 | return negated; | ||
| 406 | } | ||
| 407 | |||
| 408 | private: | ||
| 409 | const Tegra::Shader::Pred index; | ||
| 410 | const bool negated; | ||
| 411 | }; | ||
| 412 | |||
| 413 | /// Attribute buffer memory (known as attributes or varyings in GLSL terms) | ||
| 414 | class AbufNode final { | ||
| 415 | public: | ||
| 416 | explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, | ||
| 417 | const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) | ||
| 418 | : input_mode{input_mode}, index{index}, element{element}, buffer{buffer} {} | ||
| 419 | |||
| 420 | explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, | ||
| 421 | Node buffer = {}) | ||
| 422 | : input_mode{}, index{index}, element{element}, buffer{buffer} {} | ||
| 423 | |||
| 424 | Tegra::Shader::IpaMode GetInputMode() const { | ||
| 425 | return input_mode; | ||
| 426 | } | ||
| 427 | |||
| 428 | Tegra::Shader::Attribute::Index GetIndex() const { | ||
| 429 | return index; | ||
| 430 | } | ||
| 431 | |||
| 432 | u32 GetElement() const { | ||
| 433 | return element; | ||
| 434 | } | ||
| 435 | |||
| 436 | Node GetBuffer() const { | ||
| 437 | return buffer; | ||
| 438 | } | ||
| 439 | |||
| 440 | private: | ||
| 441 | const Tegra::Shader::IpaMode input_mode; | ||
| 442 | const Node buffer; | ||
| 443 | const Tegra::Shader::Attribute::Index index; | ||
| 444 | const u32 element; | ||
| 445 | }; | ||
| 446 | |||
| 447 | /// Constant buffer node, usually mapped to uniform buffers in GLSL | ||
| 448 | class CbufNode final { | ||
| 449 | public: | ||
| 450 | explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} | ||
| 451 | |||
| 452 | u32 GetIndex() const { | ||
| 453 | return index; | ||
| 454 | } | ||
| 455 | |||
| 456 | Node GetOffset() const { | ||
| 457 | return offset; | ||
| 458 | } | ||
| 459 | |||
| 460 | private: | ||
| 461 | const u32 index; | ||
| 462 | const Node offset; | ||
| 463 | }; | ||
| 464 | |||
| 465 | /// Local memory node | ||
| 466 | class LmemNode final { | ||
| 467 | public: | ||
| 468 | explicit constexpr LmemNode(Node address) : address{address} {} | ||
| 469 | |||
| 470 | Node GetAddress() const { | ||
| 471 | return address; | ||
| 472 | } | ||
| 473 | |||
| 474 | private: | ||
| 475 | const Node address; | ||
| 476 | }; | ||
| 477 | |||
| 478 | /// Global memory node | ||
| 479 | class GmemNode final { | ||
| 480 | public: | ||
| 481 | explicit constexpr GmemNode(Node address) : address{address} {} | ||
| 482 | |||
| 483 | Node GetAddress() const { | ||
| 484 | return address; | ||
| 485 | } | ||
| 486 | |||
| 487 | private: | ||
| 488 | const Node address; | ||
| 489 | }; | ||
| 490 | |||
| 491 | /// Commentary, can be dropped | ||
| 492 | class CommentNode final { | ||
| 493 | public: | ||
| 494 | explicit CommentNode(std::string text) : text{std::move(text)} {} | ||
| 495 | |||
| 496 | const std::string& GetText() const { | ||
| 497 | return text; | ||
| 498 | } | ||
| 499 | |||
| 500 | private: | ||
| 501 | std::string text; | ||
| 502 | }; | ||
| 503 | |||
| 504 | class ShaderIR final { | ||
| 505 | public: | ||
| 506 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) | ||
| 507 | : program_code{program_code}, main_offset{main_offset} { | ||
| 508 | |||
| 509 | Decode(); | ||
| 510 | } | ||
| 511 | |||
| 512 | const std::map<u32, BasicBlock>& GetBasicBlocks() const { | ||
| 513 | return basic_blocks; | ||
| 514 | } | ||
| 515 | |||
| 516 | const std::set<u32>& GetRegisters() const { | ||
| 517 | return used_registers; | ||
| 518 | } | ||
| 519 | |||
| 520 | const std::set<Tegra::Shader::Pred>& GetPredicates() const { | ||
| 521 | return used_predicates; | ||
| 522 | } | ||
| 523 | |||
| 524 | const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& | ||
| 525 | GetInputAttributes() const { | ||
| 526 | return used_input_attributes; | ||
| 527 | } | ||
| 528 | |||
| 529 | const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { | ||
| 530 | return used_output_attributes; | ||
| 531 | } | ||
| 532 | |||
| 533 | const std::map<u32, ConstBuffer>& GetConstantBuffers() const { | ||
| 534 | return used_cbufs; | ||
| 535 | } | ||
| 536 | |||
| 537 | const std::set<Sampler>& GetSamplers() const { | ||
| 538 | return used_samplers; | ||
| 539 | } | ||
| 540 | |||
| 541 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | ||
| 542 | const { | ||
| 543 | return used_clip_distances; | ||
| 544 | } | ||
| 545 | |||
| 546 | std::size_t GetLength() const { | ||
| 547 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | ||
| 548 | } | ||
| 549 | |||
| 550 | const Tegra::Shader::Header& GetHeader() const { | ||
| 551 | return header; | ||
| 552 | } | ||
| 553 | |||
| 554 | private: | ||
| 555 | void Decode(); | ||
| 556 | |||
| 557 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 558 | |||
| 559 | BasicBlock DecodeRange(u32 begin, u32 end); | ||
| 560 | |||
| 561 | /** | ||
| 562 | * Decodes a single instruction from Tegra to IR. | ||
| 563 | * @param bb Basic block where the nodes will be written to. | ||
| 564 | * @param pc Program counter. Offset to decode. | ||
| 565 | * @return Next address to decode. | ||
| 566 | */ | ||
| 567 | u32 DecodeInstr(BasicBlock& bb, u32 pc); | ||
| 568 | |||
| 569 | u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 570 | u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 571 | u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 572 | u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 573 | u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 574 | u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 575 | u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 576 | u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 577 | u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 578 | u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 579 | u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 580 | u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 581 | u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 582 | u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 583 | u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 584 | u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 585 | u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 586 | u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 587 | u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 588 | u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 589 | u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 590 | u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 591 | u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 592 | u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 593 | u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 594 | |||
| 595 | /// Internalizes node's data and returns a managed pointer to a clone of that node | ||
| 596 | Node StoreNode(NodeData&& node_data); | ||
| 597 | |||
| 598 | /// Creates a conditional node | ||
| 599 | Node Conditional(Node condition, std::vector<Node>&& code); | ||
| 600 | /// Creates a commentary | ||
| 601 | Node Comment(const std::string& text); | ||
| 602 | /// Creates an u32 immediate | ||
| 603 | Node Immediate(u32 value); | ||
| 604 | /// Creates a s32 immediate | ||
| 605 | Node Immediate(s32 value) { | ||
| 606 | return Immediate(static_cast<u32>(value)); | ||
| 607 | } | ||
| 608 | /// Creates a f32 immediate | ||
| 609 | Node Immediate(f32 value) { | ||
| 610 | u32 integral; | ||
| 611 | std::memcpy(&integral, &value, sizeof(u32)); | ||
| 612 | return Immediate(integral); | ||
| 613 | } | ||
| 614 | |||
| 615 | /// Generates a node for a passed register. | ||
| 616 | Node GetRegister(Tegra::Shader::Register reg); | ||
| 617 | /// Generates a node representing a 19-bit immediate value | ||
| 618 | Node GetImmediate19(Tegra::Shader::Instruction instr); | ||
| 619 | /// Generates a node representing a 32-bit immediate value | ||
| 620 | Node GetImmediate32(Tegra::Shader::Instruction instr); | ||
| 621 | /// Generates a node representing a constant buffer | ||
| 622 | Node GetConstBuffer(u64 index, u64 offset); | ||
| 623 | /// Generates a node representing a constant buffer with a variadic offset | ||
| 624 | Node GetConstBufferIndirect(u64 index, u64 offset, Node node); | ||
| 625 | /// Generates a node for a passed predicate. It can be optionally negated | ||
| 626 | Node GetPredicate(u64 pred, bool negated = false); | ||
| 627 | /// Generates a predicate node for an immediate true or false value | ||
| 628 | Node GetPredicate(bool immediate); | ||
| 629 | /// Generates a node representing an input atttribute. Keeps track of used attributes. | ||
| 630 | Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, | ||
| 631 | const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); | ||
| 632 | /// Generates a node representing an output atttribute. Keeps track of used attributes. | ||
| 633 | Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); | ||
| 634 | /// Generates a node representing an internal flag | ||
| 635 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | ||
| 636 | /// Generates a node representing a local memory address | ||
| 637 | Node GetLocalMemory(Node address); | ||
| 638 | /// Generates a temporal, internally it uses a post-RZ register | ||
| 639 | Node GetTemporal(u32 id); | ||
| 640 | |||
| 641 | /// Sets a register. src value must be a number-evaluated node. | ||
| 642 | void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); | ||
| 643 | /// Sets a predicate. src value must be a bool-evaluated node | ||
| 644 | void SetPredicate(BasicBlock& bb, u64 dest, Node src); | ||
| 645 | /// Sets an internal flag. src value must be a bool-evaluated node | ||
| 646 | void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); | ||
| 647 | /// Sets a local memory address. address and value must be a number-evaluated node | ||
| 648 | void SetLocalMemory(BasicBlock& bb, Node address, Node value); | ||
| 649 | /// Sets a temporal. Internally it uses a post-RZ register | ||
| 650 | void SetTemporal(BasicBlock& bb, u32 id, Node value); | ||
| 651 | |||
| 652 | /// Sets internal flags from a float | ||
| 653 | void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); | ||
| 654 | /// Sets internal flags from an integer | ||
| 655 | void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); | ||
| 656 | |||
| 657 | /// Conditionally absolute/negated float. Absolute is applied first | ||
| 658 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | ||
| 659 | /// Conditionally saturates a float | ||
| 660 | Node GetSaturatedFloat(Node value, bool saturate = true); | ||
| 661 | |||
| 662 | /// Converts an integer to different sizes. | ||
| 663 | Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); | ||
| 664 | /// Conditionally absolute/negated integer. Absolute is applied first | ||
| 665 | Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); | ||
| 666 | |||
| 667 | /// Unpacks a half immediate from an instruction | ||
| 668 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | ||
| 669 | /// Merges a half pair into another value | ||
| 670 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | ||
| 671 | /// Conditionally absolute/negated half float pair. Absolute is applied first | ||
| 672 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | ||
| 673 | |||
| 674 | /// Returns a predicate comparing two floats | ||
| 675 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 676 | /// Returns a predicate comparing two integers | ||
| 677 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | ||
| 678 | Node op_a, Node op_b); | ||
| 679 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | ||
| 680 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | ||
| 681 | const MetaHalfArithmetic& meta, Node op_a, Node op_b); | ||
| 682 | |||
| 683 | /// Returns a predicate combiner operation | ||
| 684 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | ||
| 685 | |||
| 686 | /// Returns a condition code evaluated from internal flags | ||
| 687 | Node GetConditionCode(Tegra::Shader::ConditionCode cc); | ||
| 688 | |||
| 689 | /// Accesses a texture sampler | ||
| 690 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | ||
| 691 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); | ||
| 692 | |||
| 693 | /// Extracts a sequence of bits from a node | ||
| 694 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | ||
| 695 | |||
| 696 | void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 697 | const Node4& components); | ||
| 698 | |||
| 699 | void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 700 | const Node4& components); | ||
| 701 | void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 702 | const Node4& components); | ||
| 703 | |||
| 704 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 705 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 706 | bool is_array); | ||
| 707 | |||
| 708 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 709 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 710 | bool is_array); | ||
| 711 | |||
| 712 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 713 | bool depth_compare, bool is_array); | ||
| 714 | |||
| 715 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 716 | bool is_array); | ||
| 717 | |||
| 718 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | ||
| 719 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||
| 720 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||
| 721 | |||
| 722 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 723 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 724 | bool is_array, std::size_t array_offset, std::size_t bias_offset, | ||
| 725 | std::vector<Node>&& coords); | ||
| 726 | |||
| 727 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||
| 728 | u64 byte_height); | ||
| 729 | |||
| 730 | void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, | ||
| 731 | Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, | ||
| 732 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 733 | Tegra::Shader::Pred predicate, bool sets_cc); | ||
| 734 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | ||
| 735 | Node op_c, Node imm_lut, bool sets_cc); | ||
| 736 | |||
| 737 | template <typename... T> | ||
| 738 | Node Operation(OperationCode code, const T*... operands) { | ||
| 739 | return StoreNode(OperationNode(code, operands...)); | ||
| 740 | } | ||
| 741 | |||
| 742 | template <typename... T> | ||
| 743 | Node Operation(OperationCode code, Meta&& meta, const T*... operands) { | ||
| 744 | return StoreNode(OperationNode(code, std::move(meta), operands...)); | ||
| 745 | } | ||
| 746 | |||
| 747 | template <typename... T> | ||
| 748 | Node Operation(OperationCode code, std::vector<Node>&& operands) { | ||
| 749 | return StoreNode(OperationNode(code, std::move(operands))); | ||
| 750 | } | ||
| 751 | |||
| 752 | template <typename... T> | ||
| 753 | Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { | ||
| 754 | return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); | ||
| 755 | } | ||
| 756 | |||
| 757 | template <typename... T> | ||
| 758 | Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { | ||
| 759 | return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); | ||
| 760 | } | ||
| 761 | |||
| 762 | template <typename... T> | ||
| 763 | Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { | ||
| 764 | return StoreNode( | ||
| 765 | OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); | ||
| 766 | } | ||
| 767 | |||
| 768 | static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); | ||
| 769 | |||
| 770 | const ProgramCode& program_code; | ||
| 771 | const u32 main_offset; | ||
| 772 | |||
| 773 | u32 coverage_begin{}; | ||
| 774 | u32 coverage_end{}; | ||
| 775 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 776 | |||
| 777 | std::map<u32, BasicBlock> basic_blocks; | ||
| 778 | |||
| 779 | std::vector<std::unique_ptr<NodeData>> stored_nodes; | ||
| 780 | |||
| 781 | std::set<u32> used_registers; | ||
| 782 | std::set<Tegra::Shader::Pred> used_predicates; | ||
| 783 | std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> | ||
| 784 | used_input_attributes; | ||
| 785 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | ||
| 786 | std::map<u32, ConstBuffer> used_cbufs; | ||
| 787 | std::set<Sampler> used_samplers; | ||
| 788 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | ||
| 789 | |||
| 790 | Tegra::Shader::Header header; | ||
| 791 | }; | ||
| 792 | |||
| 793 | } // namespace VideoCommon::Shader | ||