summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-03-28 18:49:50 -0300
committerGravatar ReinUsesLisp2020-04-12 00:34:42 -0300
commit76f178ba6e7cc2925ffada341d1e14fb159e93c7 (patch)
treefec59062ca81807bd8ac4dc8e0216fe2f5c27384 /src
parentshader_bytecode: Fix I2I_IMM encoding (diff)
downloadyuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.tar.gz
yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.tar.xz
yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.zip
shader/video: Partially implement VMNMX
Implements the common usages for VMNMX. Inputs with a different size than 32 bits are not supported and sign mismatches aren't supported either. VMNMX works as follows: It grabs Ra and Rb and applies a maximum/minimum on them (this is defined by .MX), having in mind the input sign. This result can then be saturated. After the intermediate result is calculated, it applies another operation on it using Rc. These operations are merges, accumulations or another min/max pass. This instruction allows to implement with a more flexible approach GCN's min3 and max3 instructions (for instance).
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h55
-rw-r--r--src/video_core/shader/decode/video.cpp58
-rw-r--r--src/video_core/shader/shader_ir.h3
3 files changed, 116 insertions, 0 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index fcb2d7935..7400e1aa9 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -302,6 +302,23 @@ enum class VmadShr : u64 {
302 Shr15 = 2, 302 Shr15 = 2,
303}; 303};
304 304
305enum class VmnmxType : u64 {
306 Bits8,
307 Bits16,
308 Bits32,
309};
310
311enum class VmnmxOperation : u64 {
312 Mrg_16H = 0,
313 Mrg_16L = 1,
314 Mrg_8B0 = 2,
315 Mrg_8B2 = 3,
316 Acc = 4,
317 Min = 5,
318 Max = 6,
319 Nop = 7,
320};
321
305enum class XmadMode : u64 { 322enum class XmadMode : u64 {
306 None = 0, 323 None = 0,
307 CLo = 1, 324 CLo = 1,
@@ -1663,6 +1680,42 @@ union Instruction {
1663 } vmad; 1680 } vmad;
1664 1681
1665 union { 1682 union {
1683 BitField<54, 1, u64> is_dest_signed;
1684 BitField<48, 1, u64> is_src_a_signed;
1685 BitField<49, 1, u64> is_src_b_signed;
1686 BitField<37, 2, u64> src_format_a;
1687 BitField<29, 2, u64> src_format_b;
1688 BitField<56, 1, u64> mx;
1689 BitField<55, 1, u64> sat;
1690 BitField<36, 2, u64> selector_a;
1691 BitField<28, 2, u64> selector_b;
1692 BitField<50, 1, u64> is_op_b_register;
1693 BitField<51, 3, VmnmxOperation> operation;
1694
1695 VmnmxType SourceFormatA() const {
1696 switch (src_format_a) {
1697 case 0b11:
1698 return VmnmxType::Bits32;
1699 case 0b10:
1700 return VmnmxType::Bits16;
1701 default:
1702 return VmnmxType::Bits8;
1703 }
1704 }
1705
1706 VmnmxType SourceFormatB() const {
1707 switch (src_format_b) {
1708 case 0b11:
1709 return VmnmxType::Bits32;
1710 case 0b10:
1711 return VmnmxType::Bits16;
1712 default:
1713 return VmnmxType::Bits8;
1714 }
1715 }
1716 } vmnmx;
1717
1718 union {
1666 BitField<20, 16, u64> imm20_16; 1719 BitField<20, 16, u64> imm20_16;
1667 BitField<35, 1, u64> high_b_rr; // used on RR 1720 BitField<35, 1, u64> high_b_rr; // used on RR
1668 BitField<36, 1, u64> product_shift_left; 1721 BitField<36, 1, u64> product_shift_left;
@@ -1773,6 +1826,7 @@ public:
1773 MEMBAR, 1826 MEMBAR,
1774 VMAD, 1827 VMAD,
1775 VSETP, 1828 VSETP,
1829 VMNMX,
1776 FFMA_IMM, // Fused Multiply and Add 1830 FFMA_IMM, // Fused Multiply and Add
1777 FFMA_CR, 1831 FFMA_CR,
1778 FFMA_RC, 1832 FFMA_RC,
@@ -2078,6 +2132,7 @@ private:
2078 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), 2132 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2079 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), 2133 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2080 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), 2134 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2135 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2081 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 2136 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2082 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 2137 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2083 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 2138 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
10 10
11namespace VideoCommon::Shader { 11namespace VideoCommon::Shader {
12 12
13using std::move;
13using Tegra::Shader::Instruction; 14using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 15using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 16using Tegra::Shader::Pred;
16using Tegra::Shader::VideoType; 17using Tegra::Shader::VideoType;
17using Tegra::Shader::VmadShr; 18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
18 21
19u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { 22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 23 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 24 const auto opcode = OpCode::Decode(instr);
22 25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
23 const Node op_a = 31 const Node op_a =
24 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, 32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
25 instr.video.type_a, instr.video.byte_height_a); 33 instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
109 } 117 }
110} 118}
111 119
120void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
121 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
124 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
125 UNIMPLEMENTED_IF(instr.vmnmx.sat);
126 UNIMPLEMENTED_IF(instr.generates_cc);
127
128 Node op_a = GetRegister(instr.gpr8);
129 Node op_b = GetRegister(instr.gpr20);
130 Node op_c = GetRegister(instr.gpr39);
131
132 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
133 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
134
135 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
136 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
137
138 switch (instr.vmnmx.operation) {
139 case VmnmxOperation::Mrg_16H:
140 value = BitfieldInsert(move(op_c), move(value), 16, 16);
141 break;
142 case VmnmxOperation::Mrg_16L:
143 value = BitfieldInsert(move(op_c), move(value), 0, 16);
144 break;
145 case VmnmxOperation::Mrg_8B0:
146 value = BitfieldInsert(move(op_c), move(value), 0, 8);
147 break;
148 case VmnmxOperation::Mrg_8B2:
149 value = BitfieldInsert(move(op_c), move(value), 16, 8);
150 break;
151 case VmnmxOperation::Acc:
152 value = Operation(OperationCode::IAdd, move(value), move(op_c));
153 break;
154 case VmnmxOperation::Min:
155 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
156 break;
157 case VmnmxOperation::Max:
158 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
159 break;
160 case VmnmxOperation::Nop:
161 break;
162 default:
163 UNREACHABLE();
164 break;
165 }
166
167 SetRegister(bb, instr.gpr0, move(value));
168}
169
112} // namespace VideoCommon::Shader 170} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 80fc9b82c..0f1ebef1b 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -350,6 +350,9 @@ private:
350 /// Marks the usage of a input or output attribute. 350 /// Marks the usage of a input or output attribute.
351 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); 351 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
352 352
353 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
354 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
355
353 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 356 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
354 const Node4& components); 357 const Node4& components);
355 358