summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jannik Vogel2016-11-30 15:40:42 +0100
committerGravatar Jannik Vogel2016-12-02 04:33:15 +0100
commite2cb7d78332703ef4e4e119c9ed586334e8e4f30 (patch)
treef37a96f68ab0a1e9a19bb46649963ef1908ed352 /src
parentMerge pull request #2251 from JayFoxRox/remove-version (diff)
downloadyuzu-e2cb7d78332703ef4e4e119c9ed586334e8e4f30.tar.gz
yuzu-e2cb7d78332703ef4e4e119c9ed586334e8e4f30.tar.xz
yuzu-e2cb7d78332703ef4e4e119c9ed586334e8e4f30.zip
shader_jit: Load LOOPCOUNT_REG and LOOPINC 4 bit left-shifted
Diffstat (limited to '')
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp15
1 files changed, 9 insertions, 6 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 211c703ab..9a3d6ca8f 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,11 +102,11 @@ static const X64Reg SETUP = R9;
102/// The two 32-bit VS address offset registers set by the MOVA instruction 102/// The two 32-bit VS address offset registers set by the MOVA instruction
103static const X64Reg ADDROFFS_REG_0 = R10; 103static const X64Reg ADDROFFS_REG_0 = R10;
104static const X64Reg ADDROFFS_REG_1 = R11; 104static const X64Reg ADDROFFS_REG_1 = R11;
105/// VS loop count register 105/// VS loop count register (Multiplied by 16)
106static const X64Reg LOOPCOUNT_REG = R12; 106static const X64Reg LOOPCOUNT_REG = R12;
107/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) 107/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
108static const X64Reg LOOPCOUNT = RSI; 108static const X64Reg LOOPCOUNT = RSI;
109/// Number to increment LOOPCOUNT_REG by on each loop iteration 109/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
110static const X64Reg LOOPINC = RDI; 110static const X64Reg LOOPINC = RDI;
111/// Result of the previous CMP instruction for the X-component comparison 111/// Result of the previous CMP instruction for the X-component comparison
112static const X64Reg COND0 = R13; 112static const X64Reg COND0 = R13;
@@ -718,15 +718,18 @@ void JitShader::Compile_LOOP(Instruction instr) {
718 718
719 looping = true; 719 looping = true;
720 720
721 // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
722 // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
723 // 4 bits) to be used as an offset into the 16-byte vector registers later
721 int offset = 724 int offset =
722 ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); 725 ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
723 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); 726 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
724 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 727 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
725 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 728 SHR(32, R(LOOPCOUNT_REG), Imm8(4));
726 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 729 AND(32, R(LOOPCOUNT_REG), Imm32(0xFF0)); // Y-component is the start
727 MOV(32, R(LOOPINC), R(LOOPCOUNT)); 730 MOV(32, R(LOOPINC), R(LOOPCOUNT));
728 SHR(32, R(LOOPINC), Imm8(16)); 731 SHR(32, R(LOOPINC), Imm8(12));
729 MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer 732 AND(32, R(LOOPINC), Imm32(0xFF0)); // Z-component is the incrementer
730 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count 733 MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count
731 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 734 ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1
732 735