diff options
| author | 2016-12-02 20:24:24 -0800 | |
|---|---|---|
| committer | 2016-12-02 20:24:24 -0800 | |
| commit | 018191c1f01980d6a642b0ef1bd0a4cd636c0178 (patch) | |
| tree | f37a96f68ab0a1e9a19bb46649963ef1908ed352 | |
| parent | Merge pull request #2251 from JayFoxRox/remove-version (diff) | |
| parent | shader_jit: Load LOOPCOUNT_REG and LOOPINC 4 bit left-shifted (diff) | |
| download | yuzu-018191c1f01980d6a642b0ef1bd0a4cd636c0178.tar.gz yuzu-018191c1f01980d6a642b0ef1bd0a4cd636c0178.tar.xz yuzu-018191c1f01980d6a642b0ef1bd0a4cd636c0178.zip | |
Merge pull request #2255 from JayFoxRox/lsl4
shader_jit: Load LOOPCOUNT_REG and LOOPINC 4 bit left-shifted
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 211c703ab..9a3d6ca8f 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -102,11 +102,11 @@ static const X64Reg SETUP = R9; | |||
| 102 | /// The two 32-bit VS address offset registers set by the MOVA instruction | 102 | /// The two 32-bit VS address offset registers set by the MOVA instruction |
| 103 | static const X64Reg ADDROFFS_REG_0 = R10; | 103 | static const X64Reg ADDROFFS_REG_0 = R10; |
| 104 | static const X64Reg ADDROFFS_REG_1 = R11; | 104 | static const X64Reg ADDROFFS_REG_1 = R11; |
| 105 | /// VS loop count register | 105 | /// VS loop count register (Multiplied by 16) |
| 106 | static const X64Reg LOOPCOUNT_REG = R12; | 106 | static const X64Reg LOOPCOUNT_REG = R12; |
| 107 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) | 107 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) |
| 108 | static const X64Reg LOOPCOUNT = RSI; | 108 | static const X64Reg LOOPCOUNT = RSI; |
| 109 | /// Number to increment LOOPCOUNT_REG by on each loop iteration | 109 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) |
| 110 | static const X64Reg LOOPINC = RDI; | 110 | static const X64Reg LOOPINC = RDI; |
| 111 | /// Result of the previous CMP instruction for the X-component comparison | 111 | /// Result of the previous CMP instruction for the X-component comparison |
| 112 | static const X64Reg COND0 = R13; | 112 | static const X64Reg COND0 = R13; |
| @@ -718,15 +718,18 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
| 718 | 718 | ||
| 719 | looping = true; | 719 | looping = true; |
| 720 | 720 | ||
| 721 | // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id. | ||
| 722 | // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by | ||
| 723 | // 4 bits) to be used as an offset into the 16-byte vector registers later | ||
| 721 | int offset = | 724 | int offset = |
| 722 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | 725 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); |
| 723 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); | 726 | MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); |
| 724 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); | 727 | MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); |
| 725 | SHR(32, R(LOOPCOUNT_REG), Imm8(8)); | 728 | SHR(32, R(LOOPCOUNT_REG), Imm8(4)); |
| 726 | AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start | 729 | AND(32, R(LOOPCOUNT_REG), Imm32(0xFF0)); // Y-component is the start |
| 727 | MOV(32, R(LOOPINC), R(LOOPCOUNT)); | 730 | MOV(32, R(LOOPINC), R(LOOPCOUNT)); |
| 728 | SHR(32, R(LOOPINC), Imm8(16)); | 731 | SHR(32, R(LOOPINC), Imm8(12)); |
| 729 | MOVZX(32, 8, LOOPINC, R(LOOPINC)); // Z-component is the incrementer | 732 | AND(32, R(LOOPINC), Imm32(0xFF0)); // Z-component is the incrementer |
| 730 | MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count | 733 | MOVZX(32, 8, LOOPCOUNT, R(LOOPCOUNT)); // X-component is iteration count |
| 731 | ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 | 734 | ADD(32, R(LOOPCOUNT), Imm8(1)); // Iteration count is X-component + 1 |
| 732 | 735 | ||