diff options
| author | 2016-12-12 21:44:15 -0800 | |
|---|---|---|
| committer | 2016-12-14 20:06:09 -0800 | |
| commit | 5ff32062076cb810f4fb1d20e1a0afd176b14bbb (patch) | |
| tree | a35d9a529d1991dc2236b352c35a2af781d4c6c9 /src | |
| parent | VideoCore: Convert x64 shader JIT to use Xbyak for assembly (diff) | |
| download | yuzu-5ff32062076cb810f4fb1d20e1a0afd176b14bbb.tar.gz yuzu-5ff32062076cb810f4fb1d20e1a0afd176b14bbb.tar.xz yuzu-5ff32062076cb810f4fb1d20e1a0afd176b14bbb.zip | |
shader_jit_x64: Use Reg32 for LOOP* registers, eliminating casts
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index a1f1f8d30..cfdeb8d6a 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -109,11 +109,11 @@ static const Reg64 SETUP = r9; | |||
| 109 | static const Reg64 ADDROFFS_REG_0 = r10; | 109 | static const Reg64 ADDROFFS_REG_0 = r10; |
| 110 | static const Reg64 ADDROFFS_REG_1 = r11; | 110 | static const Reg64 ADDROFFS_REG_1 = r11; |
| 111 | /// VS loop count register (Multiplied by 16) | 111 | /// VS loop count register (Multiplied by 16) |
| 112 | static const Reg64 LOOPCOUNT_REG = r12; | 112 | static const Reg32 LOOPCOUNT_REG = r12d; |
| 113 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) | 113 | /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) |
| 114 | static const Reg64 LOOPCOUNT = rsi; | 114 | static const Reg32 LOOPCOUNT = esi; |
| 115 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) | 115 | /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) |
| 116 | static const Reg64 LOOPINC = rdi; | 116 | static const Reg32 LOOPINC = edi; |
| 117 | /// Result of the previous CMP instruction for the X-component comparison | 117 | /// Result of the previous CMP instruction for the X-component comparison |
| 118 | static const Reg64 COND0 = r13; | 118 | static const Reg64 COND0 = r13; |
| 119 | /// Result of the previous CMP instruction for the Y-component comparison | 119 | /// Result of the previous CMP instruction for the Y-component comparison |
| @@ -734,24 +734,24 @@ void JitShader::Compile_LOOP(Instruction instr) { | |||
| 734 | // 4 bits) to be used as an offset into the 16-byte vector registers later | 734 | // 4 bits) to be used as an offset into the 16-byte vector registers later |
| 735 | size_t offset = | 735 | size_t offset = |
| 736 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); | 736 | ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); |
| 737 | mov(LOOPCOUNT.cvt32(), dword[SETUP + offset]); | 737 | mov(LOOPCOUNT, dword[SETUP + offset]); |
| 738 | mov(LOOPCOUNT_REG.cvt32(), LOOPCOUNT.cvt32()); | 738 | mov(LOOPCOUNT_REG, LOOPCOUNT); |
| 739 | shr(LOOPCOUNT_REG.cvt32(), 4); | 739 | shr(LOOPCOUNT_REG, 4); |
| 740 | and(LOOPCOUNT_REG.cvt32(), 0xFF0); // Y-component is the start | 740 | and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start |
| 741 | mov(LOOPINC.cvt32(), LOOPCOUNT.cvt32()); | 741 | mov(LOOPINC, LOOPCOUNT); |
| 742 | shr(LOOPINC.cvt32(), 12); | 742 | shr(LOOPINC, 12); |
| 743 | and(LOOPINC.cvt32(), 0xFF0); // Z-component is the incrementer | 743 | and(LOOPINC, 0xFF0); // Z-component is the incrementer |
| 744 | movzx(LOOPCOUNT.cvt32(), LOOPCOUNT.cvt8()); // X-component is iteration count | 744 | movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count |
| 745 | add(LOOPCOUNT.cvt32(), 1); // Iteration count is X-component + 1 | 745 | add(LOOPCOUNT, 1); // Iteration count is X-component + 1 |
| 746 | 746 | ||
| 747 | Label l_loop_start; | 747 | Label l_loop_start; |
| 748 | L(l_loop_start); | 748 | L(l_loop_start); |
| 749 | 749 | ||
| 750 | Compile_Block(instr.flow_control.dest_offset + 1); | 750 | Compile_Block(instr.flow_control.dest_offset + 1); |
| 751 | 751 | ||
| 752 | add(LOOPCOUNT_REG.cvt32(), LOOPINC.cvt32()); // Increment LOOPCOUNT_REG by Z-component | 752 | add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component |
| 753 | sub(LOOPCOUNT.cvt32(), 1); // Increment loop count by 1 | 753 | sub(LOOPCOUNT, 1); // Increment loop count by 1 |
| 754 | jnz(l_loop_start); // Loop if not equal | 754 | jnz(l_loop_start); // Loop if not equal |
| 755 | 755 | ||
| 756 | looping = false; | 756 | looping = false; |
| 757 | } | 757 | } |
| @@ -856,7 +856,7 @@ void JitShader::Compile() { | |||
| 856 | // Zero address/loop registers | 856 | // Zero address/loop registers |
| 857 | xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); | 857 | xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); |
| 858 | xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); | 858 | xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); |
| 859 | xor(LOOPCOUNT_REG.cvt32(), LOOPCOUNT_REG.cvt32()); | 859 | xor(LOOPCOUNT_REG, LOOPCOUNT_REG); |
| 860 | 860 | ||
| 861 | // Used to set a register to one | 861 | // Used to set a register to one |
| 862 | static const __m128 one = {1.f, 1.f, 1.f, 1.f}; | 862 | static const __m128 one = {1.f, 1.f, 1.f, 1.f}; |