summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar liamwhite2023-06-29 10:01:26 -0400
committerGravatar GitHub2023-06-29 10:01:26 -0400
commit5e70db0d433054e18ab7432cafd2a87d68573642 (patch)
tree3ea86827d33b20a4083964feca7d7b532685efb9
parentMerge pull request #10937 from german77/ring (diff)
parentx64: cpu_wait: Implement MWAITX for non-MSVC compilers (diff)
downloadyuzu-5e70db0d433054e18ab7432cafd2a87d68573642.tar.gz
yuzu-5e70db0d433054e18ab7432cafd2a87d68573642.tar.xz
yuzu-5e70db0d433054e18ab7432cafd2a87d68573642.zip
Merge pull request #10935 from Morph1984/mwaitx
x64: Make use of monitorx instructions for power efficient sleeps (AMD)
Diffstat (limited to '')
-rw-r--r--src/common/telemetry.cpp1
-rw-r--r--src/common/x64/cpu_detect.cpp1
-rw-r--r--src/common/x64/cpu_detect.h1
-rw-r--r--src/common/x64/cpu_wait.cpp52
4 files changed, 41 insertions, 14 deletions
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 91352912d..929ed67e4 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) {
93 add_field("CPU_Extension_x64_GFNI", caps.gfni); 93 add_field("CPU_Extension_x64_GFNI", caps.gfni);
94 add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); 94 add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
95 add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); 95 add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
96 add_field("CPU_Extension_x64_MONITORX", caps.monitorx);
96 add_field("CPU_Extension_x64_MOVBE", caps.movbe); 97 add_field("CPU_Extension_x64_MOVBE", caps.movbe);
97 add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); 98 add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
98 add_field("CPU_Extension_x64_POPCNT", caps.popcnt); 99 add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c998b1197..780120a5b 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -168,6 +168,7 @@ static CPUCaps Detect() {
168 __cpuid(cpu_id, 0x80000001); 168 __cpuid(cpu_id, 0x80000001);
169 caps.lzcnt = Common::Bit<5>(cpu_id[2]); 169 caps.lzcnt = Common::Bit<5>(cpu_id[2]);
170 caps.fma4 = Common::Bit<16>(cpu_id[2]); 170 caps.fma4 = Common::Bit<16>(cpu_id[2]);
171 caps.monitorx = Common::Bit<29>(cpu_id[2]);
171 } 172 }
172 173
173 if (max_ex_fn >= 0x80000007) { 174 if (max_ex_fn >= 0x80000007) {
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 8253944d6..756459417 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -63,6 +63,7 @@ struct CPUCaps {
63 bool gfni : 1; 63 bool gfni : 1;
64 bool invariant_tsc : 1; 64 bool invariant_tsc : 1;
65 bool lzcnt : 1; 65 bool lzcnt : 1;
66 bool monitorx : 1;
66 bool movbe : 1; 67 bool movbe : 1;
67 bool pclmulqdq : 1; 68 bool pclmulqdq : 1;
68 bool popcnt : 1; 69 bool popcnt : 1;
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
index c53dd4945..41d385f59 100644
--- a/src/common/x64/cpu_wait.cpp
+++ b/src/common/x64/cpu_wait.cpp
@@ -13,36 +13,60 @@
13 13
14namespace Common::X64 { 14namespace Common::X64 {
15 15
16namespace {
17
18// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
19// For reference:
20// At 1 GHz, 100K cycles is 100us
21// At 2 GHz, 100K cycles is 50us
22// At 4 GHz, 100K cycles is 25us
23constexpr auto PauseCycles = 100'000U;
24
25} // Anonymous namespace
26
16#ifdef _MSC_VER 27#ifdef _MSC_VER
17__forceinline static void TPAUSE() { 28__forceinline static void TPAUSE() {
18 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 29 static constexpr auto RequestC02State = 0U;
19 // For reference: 30 _tpause(RequestC02State, FencedRDTSC() + PauseCycles);
20 // At 1 GHz, 100K cycles is 100us 31}
21 // At 2 GHz, 100K cycles is 50us 32
22 // At 4 GHz, 100K cycles is 25us 33__forceinline static void MWAITX() {
23 static constexpr auto PauseCycles = 100'000; 34 static constexpr auto EnableWaitTimeFlag = 1U << 1;
24 _tpause(0, FencedRDTSC() + PauseCycles); 35 static constexpr auto RequestC1State = 0U;
36
37 // monitor_var should be aligned to a cache line.
38 alignas(64) u64 monitor_var{};
39 _mm_monitorx(&monitor_var, 0, 0);
40 _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
25} 41}
26#else 42#else
27static void TPAUSE() { 43static void TPAUSE() {
28 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 44 static constexpr auto RequestC02State = 0U;
29 // For reference:
30 // At 1 GHz, 100K cycles is 100us
31 // At 2 GHz, 100K cycles is 50us
32 // At 4 GHz, 100K cycles is 25us
33 static constexpr auto PauseCycles = 100'000;
34 const auto tsc = FencedRDTSC() + PauseCycles; 45 const auto tsc = FencedRDTSC() + PauseCycles;
35 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); 46 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
36 const auto edx = static_cast<u32>(tsc >> 32); 47 const auto edx = static_cast<u32>(tsc >> 32);
37 asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); 48 asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
49}
50
51static void MWAITX() {
52 static constexpr auto EnableWaitTimeFlag = 1U << 1;
53 static constexpr auto RequestC1State = 0U;
54
55 // monitor_var should be aligned to a cache line.
56 alignas(64) u64 monitor_var{};
57 asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
58 asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
38} 59}
39#endif 60#endif
40 61
41void MicroSleep() { 62void MicroSleep() {
42 static const bool has_waitpkg = GetCPUCaps().waitpkg; 63 static const bool has_waitpkg = GetCPUCaps().waitpkg;
64 static const bool has_monitorx = GetCPUCaps().monitorx;
43 65
44 if (has_waitpkg) { 66 if (has_waitpkg) {
45 TPAUSE(); 67 TPAUSE();
68 } else if (has_monitorx) {
69 MWAITX();
46 } else { 70 } else {
47 std::this_thread::yield(); 71 std::this_thread::yield();
48 } 72 }