2 files changed, 58 insertions, 7 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 58f2904ce..d6e2397f2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -67,6 +67,13 @@ private:
    u64 value{};
 };
+enum class AttributeSize : u64 {
+    Word = 0,
+    DoubleWord = 1,
+    TripleWord = 2,
+    QuadWord = 3,
+};
 union Attribute {
    Attribute() = default;
@@ -87,9 +94,10 @@ union Attribute {
    };
    union {
+        BitField<20, 10, u64> immediate;
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
-        BitField<47, 3, u64> size;
+        BitField<47, 3, AttributeSize> size;
    } fmt20;
    union {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2d56370c7..81c0662d0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1772,13 +1772,34 @@ private:
        case OpCode::Type::Memory: {
            switch (opcode->GetId()) {
            case OpCode::Id::LD_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+                           "Indirect attribute loads are not supported");
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
                Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
                                                  Tegra::Shader::IpaSampleMode::Default};
-                regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
-                                                instr.attribute.fmt20.index, input_mode);
+                u32 next_element = instr.attribute.fmt20.element;
+                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+                const auto LoadNextElement = [&](u32 reg_offset) {
+                    regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
+                                                    static_cast<Attribute::Index>(next_index),
+                                                    input_mode);
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    LoadNextElement(reg_offset);
+                }
                break;
            }
            case OpCode::Id::LD_C: {
@@ -1820,9 +1841,31 @@ private:
                break;
            }
            case OpCode::Id::ST_A: {
-                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
+                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
-                regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
+                           "Indirect attribute loads are not supported");
-                                                  instr.attribute.fmt20.element, instr.gpr0);
+                ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+                           "Unaligned attribute loads are not supported");
+                u32 next_element = instr.attribute.fmt20.element;
+                u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
+                const auto StoreNextElement = [&](u32 reg_offset) {
+                    regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
+                                                      next_element,
+                                                      instr.gpr0.Value() + reg_offset);
+                    // Load the next attribute element into the following register. If the element
+                    // to load goes beyond the vec4 size, load the first element of the next
+                    // attribute.
+                    next_element = (next_element + 1) % 4;
+                    next_index = next_index + (next_element == 0 ? 1 : 0);
+                };
+                const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+                for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+                    StoreNextElement(reg_offset);
+                }
                break;
            }
            case OpCode::Id::TEX: {

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 58f2904ce..d6e2397f2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h
@@ -67,6 +67,13 @@ private:
67	u64 value{};	67	u64 value{};
68	};	68	};
69		69
		70	enum class AttributeSize : u64 {
		71	Word = 0,
		72	DoubleWord = 1,
		73	TripleWord = 2,
		74	QuadWord = 3,
		75	};
		76
70	union Attribute {	77	union Attribute {
71	Attribute() = default;	78	Attribute() = default;
72		79
@@ -87,9 +94,10 @@ union Attribute {
87	};	94	};
88		95
89	union {	96	union {
		97	BitField<20, 10, u64> immediate;
90	BitField<22, 2, u64> element;	98	BitField<22, 2, u64> element;
91	BitField<24, 6, Index> index;	99	BitField<24, 6, Index> index;
92	BitField<47, 3, u64> size;	100	BitField<47, 3, AttributeSize> size;
93	} fmt20;	101	} fmt20;
94		102
95	union {	103	union {


diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2d56370c7..81c0662d0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1772,13 +1772,34 @@ private:
1772	case OpCode::Type::Memory: {	1772	case OpCode::Type::Memory: {
1773	switch (opcode->GetId()) {	1773	switch (opcode->GetId()) {
1774	case OpCode::Id::LD_A: {	1774	case OpCode::Id::LD_A: {
1775	ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
1776	// Note: Shouldn't this be interp mode flat? As in no interpolation made.	1775	// Note: Shouldn't this be interp mode flat? As in no interpolation made.
		1776	ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
		1777	"Indirect attribute loads are not supported");
		1778	ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
		1779	"Unaligned attribute loads are not supported");
1777		1780
1778	Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,	1781	Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
1779	Tegra::Shader::IpaSampleMode::Default};	1782	Tegra::Shader::IpaSampleMode::Default};
1780	regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,	1783
1781	instr.attribute.fmt20.index, input_mode);	1784	u32 next_element = instr.attribute.fmt20.element;
		1785	u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
		1786
		1787	const auto LoadNextElement = [&](u32 reg_offset) {
		1788	regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
		1789	static_cast<Attribute::Index>(next_index),
		1790	input_mode);
		1791
		1792	// Load the next attribute element into the following register. If the element
		1793	// to load goes beyond the vec4 size, load the first element of the next
		1794	// attribute.
		1795	next_element = (next_element + 1) % 4;
		1796	next_index = next_index + (next_element == 0 ? 1 : 0);
		1797	};
		1798
		1799	const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
		1800	for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
		1801	LoadNextElement(reg_offset);
		1802	}
1782	break;	1803	break;
1783	}	1804	}
1784	case OpCode::Id::LD_C: {	1805	case OpCode::Id::LD_C: {
@@ -1820,9 +1841,31 @@ private:
1820	break;	1841	break;
1821	}	1842	}
1822	case OpCode::Id::ST_A: {	1843	case OpCode::Id::ST_A: {
1823	ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");	1844	ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
1824	regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,	1845	"Indirect attribute loads are not supported");
1825	instr.attribute.fmt20.element, instr.gpr0);	1846	ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
		1847	"Unaligned attribute loads are not supported");
		1848
		1849	u32 next_element = instr.attribute.fmt20.element;
		1850	u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value());
		1851
		1852	const auto StoreNextElement = [&](u32 reg_offset) {
		1853	regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
		1854	next_element,
		1855	instr.gpr0.Value() + reg_offset);
		1856
		1857	// Load the next attribute element into the following register. If the element
		1858	// to load goes beyond the vec4 size, load the first element of the next
		1859	// attribute.
		1860	next_element = (next_element + 1) % 4;
		1861	next_index = next_index + (next_element == 0 ? 1 : 0);
		1862	};
		1863
		1864	const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
		1865	for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
		1866	StoreNextElement(reg_offset);
		1867	}
		1868
1826	break;	1869	break;
1827	}	1870	}
1828	case OpCode::Id::TEX: {	1871	case OpCode::Id::TEX: {