Merge pull request #355 from Subv/shader_instr

ShaderGen: Fixed TEXS overriding its own texcoords and implemented fmul32i
author: bunnei 2018-04-19 21:09:02 -0400
committer: GitHub 2018-04-19 21:09:02 -0400
commit: f633b0c87557101ae30fa2657621e804c9d637c9 (patch)
tree: 4013d5a2feccf26c16060ae72c42c200d8da3482
parent: Merge pull request #348 from jlachniet/patch-1 (diff)
parent: ShaderGen: Implemented the fmul32i shader instruction. (diff)
download: yuzu-f633b0c87557101ae30fa2657621e804c9d637c9.tar.gz
yuzu-f633b0c87557101ae30fa2657621e804c9d637c9.tar.xz
yuzu-f633b0c87557101ae30fa2657621e804c9d637c9.zip
2 files changed, 39 insertions, 11 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index ed66d893a..7cd125f05 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -90,6 +90,7 @@ union OpCode {
    enum class Id : u64 {
        TEXS = 0x6C,
        IPA = 0xE0,
+        FMUL32_IMM = 0x1E,
        FFMA_IMM = 0x65,
        FFMA_CR = 0x93,
        FFMA_RC = 0xA3,
@@ -142,6 +143,7 @@ union OpCode {
        switch (op2) {
        case Id::IPA:
+        case Id::FMUL32_IMM:
            return op2;
        }
@@ -235,6 +237,7 @@ union OpCode {
        info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
        info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
        info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
+        info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"};
        info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
        info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
        info_table[Id::EXIT] = {Type::Trivial, "exit"};
@@ -309,7 +312,8 @@ union Instruction {
    BitField<39, 8, Register> gpr39;
    union {
-        BitField<20, 19, u64> imm20;
+        BitField<20, 19, u64> imm20_19;
+        BitField<20, 32, u64> imm20_32;
        BitField<45, 1, u64> negate_b;
        BitField<46, 1, u64> abs_a;
        BitField<48, 1, u64> negate_a;
@@ -317,14 +321,21 @@ union Instruction {
        BitField<50, 1, u64> abs_d;
        BitField<56, 1, u64> negate_imm;
-        float GetImm20() const {
+        float GetImm20_19() const {
            float result{};
-            u32 imm{static_cast<u32>(imm20)};
+            u32 imm{static_cast<u32>(imm20_19)};
            imm <<= 12;
            imm |= negate_imm ? 0x80000000 : 0;
            std::memcpy(&result, &imm, sizeof(imm));
            return result;
        }
+        float GetImm20_32() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_32)};
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
    } alu;
    union {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6233ee358..4cc617c97 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -190,9 +190,14 @@ private:
        }
    }
-    /// Generates code representing an immediate value
+    /// Generates code representing a 19-bit immediate value
-    static std::string GetImmediate(const Instruction& instr) {
+    static std::string GetImmediate19(const Instruction& instr) {
-        return std::to_string(instr.alu.GetImm20());
+        return std::to_string(instr.alu.GetImm20_19());
+    }
+    /// Generates code representing a 32-bit immediate value
+    static std::string GetImmediate32(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20_32());
    }
    /// Generates code representing a temporary (GPR) register.
@@ -276,7 +281,7 @@ private:
            std::string op_b = instr.alu.negate_b ? "-" : "";
            if (instr.is_b_imm) {
-                op_b += GetImmediate(instr);
+                op_b += GetImmediate19(instr);
            } else {
                if (instr.is_b_gpr) {
                    op_b += GetRegister(instr.gpr20);
@@ -296,6 +301,11 @@ private:
                SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
                break;
            }
+            case OpCode::Id::FMUL32_IMM: {
+                // fmul32i doesn't have abs or neg bits.
+                SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+                break;
+            }
            case OpCode::Id::FADD_C:
            case OpCode::Id::FADD_R:
            case OpCode::Id::FADD_IMM: {
@@ -364,7 +374,7 @@ private:
                break;
            }
            case OpCode::Id::FFMA_IMM: {
-                op_b += GetImmediate(instr);
+                op_b += GetImmediate19(instr);
                op_c += GetRegister(instr.gpr39);
                break;
            }
@@ -399,11 +409,18 @@ private:
                const std::string op_a = GetRegister(instr.gpr8);
                const std::string op_b = GetRegister(instr.gpr20);
                const std::string sampler = GetSampler(instr.sampler);
-                const std::string coord = "vec2(" + op_a + ", " + op_b + ")";
+                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
-                const std::string texture = "texture(" + sampler + ", " + coord + ")";
+                // Add an extra scope and declare the texture coords inside to prevent overwriting
+                // them in case they are used as outputs of the texs instruction.
+                shader.AddLine("{");
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "texture(" + sampler + ", coords)";
                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
                    SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
                }
+                --shader.scope;
+                shader.AddLine("}");
                break;
            }
            default: {
@@ -586,7 +603,7 @@ private:
    std::set<Attribute::Index> declr_input_attribute;
    std::set<Attribute::Index> declr_output_attribute;
    std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
-};
+}; // namespace Decompiler
 std::string GetCommonDeclarations() {
    return "bool exec_shader();";
author	bunnei	2018-04-19 21:09:02 -0400
committer	GitHub	2018-04-19 21:09:02 -0400
commit	f633b0c87557101ae30fa2657621e804c9d637c9 (patch)
tree	4013d5a2feccf26c16060ae72c42c200d8da3482
parent	Merge pull request #348 from jlachniet/patch-1 (diff)
parent	ShaderGen: Implemented the fmul32i shader instruction. (diff)
download	yuzu-f633b0c87557101ae30fa2657621e804c9d637c9.tar.gz yuzu-f633b0c87557101ae30fa2657621e804c9d637c9.tar.xz yuzu-f633b0c87557101ae30fa2657621e804c9d637c9.zip