summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2020-02-26 10:24:35 -0500
committerGravatar GitHub2020-02-26 10:24:35 -0500
commit1f57f679a49873d324353e0cb238196e76650023 (patch)
treefac7ddc07206aa2fa565a7cba74c026105b93da3
parentMerge pull request #3431 from CJBok/npad-fix (diff)
parentnit: add const to where it need. (diff)
downloadyuzu-1f57f679a49873d324353e0cb238196e76650023.tar.gz
yuzu-1f57f679a49873d324353e0cb238196e76650023.tar.xz
yuzu-1f57f679a49873d324353e0cb238196e76650023.zip
Merge pull request #3440 from namkazt/patch-6
shader: implement LOP3 fast replace for old function
Diffstat (limited to '')
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp94
1 files changed, 58 insertions, 36 deletions
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 21366869d..2fe787d6f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
293 293
294void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, 294void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
295 Node imm_lut, bool sets_cc) { 295 Node imm_lut, bool sets_cc) {
296 constexpr u32 lop_iterations = 32; 296 const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
297 const Node one = Immediate(1); 297 Node value = Immediate(0);
298 const Node two = Immediate(2); 298 const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
299 299 if (imm.GetValue() & 0x01) {
300 Node value; 300 const Node a = Operation(OperationCode::IBitwiseNot, na);
301 for (u32 i = 0; i < lop_iterations; ++i) { 301 const Node b = Operation(OperationCode::IBitwiseNot, nb);
302 const Node shift_amount = Immediate(i); 302 const Node c = Operation(OperationCode::IBitwiseNot, nc);
303 303 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
304 const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); 304 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
305 const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); 305 value = Operation(OperationCode::IBitwiseOr, value, r);
306
307 const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
308 const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
309 const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
310
311 const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
312 const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
313 const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
314
315 const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
316 const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
317
318 const Node shifted_bit =
319 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
320 const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
321
322 const Node right =
323 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
324
325 if (i > 0) {
326 value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
327 } else {
328 value = right;
329 } 306 }
330 } 307 if (imm.GetValue() & 0x02) {
308 const Node a = Operation(OperationCode::IBitwiseNot, na);
309 const Node b = Operation(OperationCode::IBitwiseNot, nb);
310 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
311 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
312 value = Operation(OperationCode::IBitwiseOr, value, r);
313 }
314 if (imm.GetValue() & 0x04) {
315 const Node a = Operation(OperationCode::IBitwiseNot, na);
316 const Node c = Operation(OperationCode::IBitwiseNot, nc);
317 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
318 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
319 value = Operation(OperationCode::IBitwiseOr, value, r);
320 }
321 if (imm.GetValue() & 0x08) {
322 const Node a = Operation(OperationCode::IBitwiseNot, na);
323 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
324 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
325 value = Operation(OperationCode::IBitwiseOr, value, r);
326 }
327 if (imm.GetValue() & 0x10) {
328 const Node b = Operation(OperationCode::IBitwiseNot, nb);
329 const Node c = Operation(OperationCode::IBitwiseNot, nc);
330 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
331 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
332 value = Operation(OperationCode::IBitwiseOr, value, r);
333 }
334 if (imm.GetValue() & 0x20) {
335 const Node b = Operation(OperationCode::IBitwiseNot, nb);
336 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
337 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
338 value = Operation(OperationCode::IBitwiseOr, value, r);
339 }
340 if (imm.GetValue() & 0x40) {
341 const Node c = Operation(OperationCode::IBitwiseNot, nc);
342 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
343 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
344 value = Operation(OperationCode::IBitwiseOr, value, r);
345 }
346 if (imm.GetValue() & 0x80) {
347 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
348 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
349 value = Operation(OperationCode::IBitwiseOr, value, r);
350 }
351 return value;
352 }(op_a, op_b, op_c, imm_lut);
331 353
332 SetInternalFlagsFromInteger(bb, value, sets_cc); 354 SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
333 SetRegister(bb, dest, value); 355 SetRegister(bb, dest, lop3_fast);
334} 356}
335 357
336} // namespace VideoCommon::Shader 358} // namespace VideoCommon::Shader