diff options
Diffstat (limited to 'src/common')
| -rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 5 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
| -rw-r--r-- | src/common/x64/xbyak_abi.h | 95 |
4 files changed, 36 insertions, 66 deletions
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 200c6489a..16d42facd 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp | |||
| @@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) { | |||
| 60 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); | 60 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); |
| 61 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); | 61 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); |
| 62 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); | 62 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); |
| 63 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); | ||
| 63 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); | 64 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); |
| 64 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); | 65 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); |
| 65 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); | 66 | fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..f35dcb498 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -110,6 +110,11 @@ static CPUCaps Detect() { | |||
| 110 | caps.bmi1 = true; | 110 | caps.bmi1 = true; |
| 111 | if ((cpu_id[1] >> 8) & 1) | 111 | if ((cpu_id[1] >> 8) & 1) |
| 112 | caps.bmi2 = true; | 112 | caps.bmi2 = true; |
| 113 | // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) | ||
| 114 | if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && | ||
| 115 | (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { | ||
| 116 | caps.avx512 = caps.avx2; | ||
| 117 | } | ||
| 113 | } | 118 | } |
| 114 | } | 119 | } |
| 115 | 120 | ||
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..7606c3f7b 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -19,6 +19,7 @@ struct CPUCaps { | |||
| 19 | bool lzcnt; | 19 | bool lzcnt; |
| 20 | bool avx; | 20 | bool avx; |
| 21 | bool avx2; | 21 | bool avx2; |
| 22 | bool avx512; | ||
| 22 | bool bmi1; | 23 | bool bmi1; |
| 23 | bool bmi2; | 24 | bool bmi2; |
| 24 | bool fma; | 25 | bool fma; |
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | namespace Common::X64 { | 12 | namespace Common::X64 { |
| 13 | 13 | ||
| 14 | inline int RegToIndex(const Xbyak::Reg& reg) { | 14 | inline std::size_t RegToIndex(const Xbyak::Reg& reg) { |
| 15 | using Kind = Xbyak::Reg::Kind; | 15 | using Kind = Xbyak::Reg::Kind; |
| 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, |
| 17 | "RegSet only support GPRs and XMM registers."); | 17 | "RegSet only support GPRs and XMM registers."); |
| @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { | |||
| 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | inline Xbyak::Reg64 IndexToReg64(int reg_index) { | 22 | inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { |
| 23 | ASSERT(reg_index < 16); | 23 | ASSERT(reg_index < 16); |
| 24 | return Xbyak::Reg64(reg_index); | 24 | return Xbyak::Reg64(static_cast<int>(reg_index)); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | inline Xbyak::Xmm IndexToXmm(int reg_index) { | 27 | inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { |
| 28 | ASSERT(reg_index >= 16 && reg_index < 32); | 28 | ASSERT(reg_index >= 16 && reg_index < 32); |
| 29 | return Xbyak::Xmm(reg_index - 16); | 29 | return Xbyak::Xmm(static_cast<int>(reg_index - 16)); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | inline Xbyak::Reg IndexToReg(int reg_index) { | 32 | inline Xbyak::Reg IndexToReg(std::size_t reg_index) { |
| 33 | if (reg_index < 16) { | 33 | if (reg_index < 16) { |
| 34 | return IndexToReg64(reg_index); | 34 | return IndexToReg64(reg_index); |
| 35 | } else { | 35 | } else { |
| @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; | |||
| 151 | 151 | ||
| 152 | #endif | 152 | #endif |
| 153 | 153 | ||
| 154 | inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | 154 | struct ABIFrameInfo { |
| 155 | size_t needed_frame_size, s32* out_subtraction, | 155 | s32 subtraction; |
| 156 | s32* out_xmm_offset) { | 156 | s32 xmm_offset; |
| 157 | }; | ||
| 158 | |||
| 159 | inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | ||
| 160 | size_t needed_frame_size) { | ||
| 157 | const auto count = (regs & ABI_ALL_GPRS).count(); | 161 | const auto count = (regs & ABI_ALL_GPRS).count(); |
| 158 | rsp_alignment -= count * 8; | 162 | rsp_alignment -= count * 8; |
| 159 | size_t subtraction = 0; | 163 | size_t subtraction = 0; |
| @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | |||
| 170 | rsp_alignment -= subtraction; | 174 | rsp_alignment -= subtraction; |
| 171 | subtraction += rsp_alignment & 0xF; | 175 | subtraction += rsp_alignment & 0xF; |
| 172 | 176 | ||
| 173 | *out_subtraction = (s32)subtraction; | 177 | return ABIFrameInfo{static_cast<s32>(subtraction), |
| 174 | *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); | 178 | static_cast<s32>(subtraction - xmm_base_subtraction)}; |
| 175 | } | 179 | } |
| 176 | 180 | ||
| 177 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | 181 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, |
| 178 | size_t rsp_alignment, size_t needed_frame_size = 0) { | 182 | size_t rsp_alignment, size_t needed_frame_size = 0) { |
| 179 | s32 subtraction, xmm_offset; | 183 | auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); |
| 180 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | 184 | |
| 181 | for (std::size_t i = 0; i < regs.size(); ++i) { | 185 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 182 | if (regs[i] && ABI_ALL_GPRS[i]) { | 186 | if (regs[i] && ABI_ALL_GPRS[i]) { |
| 183 | code.push(IndexToReg64(static_cast<int>(i))); | 187 | code.push(IndexToReg64(i)); |
| 184 | } | 188 | } |
| 185 | } | 189 | } |
| 186 | if (subtraction != 0) { | ||
| 187 | code.sub(code.rsp, subtraction); | ||
| 188 | } | ||
| 189 | 190 | ||
| 190 | for (int i = 0; i < regs.count(); i++) { | 191 | if (frame_info.subtraction != 0) { |
| 191 | if (regs.test(i) & ABI_ALL_GPRS.test(i)) { | 192 | code.sub(code.rsp, frame_info.subtraction); |
| 192 | code.push(IndexToReg64(i)); | ||
| 193 | } | ||
| 194 | } | 193 | } |
| 195 | 194 | ||
| 196 | for (std::size_t i = 0; i < regs.size(); ++i) { | 195 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 197 | if (regs[i] && ABI_ALL_XMMS[i]) { | 196 | if (regs[i] && ABI_ALL_XMMS[i]) { |
| 198 | code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); | 197 | code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); |
| 199 | xmm_offset += 0x10; | 198 | frame_info.xmm_offset += 0x10; |
| 200 | } | 199 | } |
| 201 | } | 200 | } |
| 202 | 201 | ||
| @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b | |||
| 205 | 204 | ||
| 206 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | 205 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, |
| 207 | size_t rsp_alignment, size_t needed_frame_size = 0) { | 206 | size_t rsp_alignment, size_t needed_frame_size = 0) { |
| 208 | s32 subtraction, xmm_offset; | 207 | auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); |
| 209 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 210 | 208 | ||
| 211 | for (std::size_t i = 0; i < regs.size(); ++i) { | 209 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 212 | if (regs[i] && ABI_ALL_XMMS[i]) { | 210 | if (regs[i] && ABI_ALL_XMMS[i]) { |
| 213 | code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); | 211 | code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); |
| 214 | xmm_offset += 0x10; | 212 | frame_info.xmm_offset += 0x10; |
| 215 | } | 213 | } |
| 216 | } | 214 | } |
| 217 | 215 | ||
| 218 | if (subtraction != 0) { | 216 | if (frame_info.subtraction != 0) { |
| 219 | code.add(code.rsp, subtraction); | 217 | code.add(code.rsp, frame_info.subtraction); |
| 220 | } | 218 | } |
| 221 | 219 | ||
| 222 | // GPRs need to be popped in reverse order | 220 | // GPRs need to be popped in reverse order |
| 223 | for (int i = 15; i >= 0; i--) { | 221 | for (std::size_t j = 0; j < regs.size(); ++j) { |
| 224 | if (regs[i]) { | 222 | const std::size_t i = regs.size() - j - 1; |
| 225 | code.pop(IndexToReg64(i)); | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 231 | size_t rsp_alignment, | ||
| 232 | size_t needed_frame_size = 0) { | ||
| 233 | s32 subtraction, xmm_offset; | ||
| 234 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 235 | |||
| 236 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 237 | if (regs[i] && ABI_ALL_GPRS[i]) { | 223 | if (regs[i] && ABI_ALL_GPRS[i]) { |
| 238 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | if (subtraction != 0) { | ||
| 243 | code.sub(code.rsp, subtraction); | ||
| 244 | } | ||
| 245 | |||
| 246 | return ABI_SHADOW_SPACE; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 250 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 251 | s32 subtraction, xmm_offset; | ||
| 252 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 253 | |||
| 254 | if (subtraction != 0) { | ||
| 255 | code.add(code.rsp, subtraction); | ||
| 256 | } | ||
| 257 | |||
| 258 | // GPRs need to be popped in reverse order | ||
| 259 | for (int i = 15; i >= 0; i--) { | ||
| 260 | if (regs[i]) { | ||
| 261 | code.pop(IndexToReg64(i)); | 224 | code.pop(IndexToReg64(i)); |
| 262 | } | 225 | } |
| 263 | } | 226 | } |