Skip to content

Commit 1fd0066

Browse files
committed
[x86] expose __cpuid, __cpuid_count, xgetbv, __readeflags, __writeeflags
Expose the `__cpuid` and `_xgetby` `x86`/`x86_64` intrinsics. The `__cpuid` and `__cpuid_count` intrinsics are not available on all `x86` CPUs. The `has_cpuid() -> bool` intrinsic detect this on non `x86_64` hosts. For convenience, this is exposed on `x86_64` as well but there it always returns `true`. These are exposed by Clang and GCC. The `__readeflags` and `__writeeflags` intrinsics, which read/write the `EFLAGS` register and are required to implement `has_cpuid`, are exposed as well. GCC and Clang exposes them too. When doing run-time feature detection for `x86`/`x86_64` we now properly check whether the `cpuid` instruction is available before using it. If it is not available, are features are exposes as "not available". One TODO: - The `_xgetbv` intrinsic requires the `xsave` target feature but this is not currently exposed by rustc, see #167 .
1 parent 4c244fb commit 1fd0066

File tree

6 files changed

+235
-36
lines changed

6 files changed

+235
-36
lines changed

src/x86/cpuid.rs

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
//! `cpuid` intrinsics
2+
3+
#[cfg(test)]
4+
use stdsimd_test::assert_instr;
5+
6+
/// Result of the `cpuid` instruction.
7+
#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)]
8+
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
9+
pub struct CpuidResult {
10+
/// EAX register.
11+
pub eax: u32,
12+
/// EBX register.
13+
pub ebx: u32,
14+
/// ECX register.
15+
pub ecx: u32,
16+
/// EDX register.
17+
pub edx: u32,
18+
}
19+
20+
/// `cpuid` instruction.
21+
///
22+
/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which
23+
/// information using the `eax` and `ecx` registers, and the format in
24+
/// which this information is returned in `eax...edx`.
25+
///
26+
/// The `has_cpuid()` intrinsics can be used to query whether the `cpuid`
27+
/// instruction is available.
28+
///
29+
/// The definitive references are:
30+
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
31+
/// Instruction Set Reference, A-Z][intel64_ref].
32+
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
33+
/// System Instructions][amd64_ref].
34+
///
35+
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
36+
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
37+
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
38+
#[inline(always)]
39+
#[cfg_attr(test, assert_instr(cpuid))]
40+
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
41+
pub unsafe fn __cpuid_count(eax: u32, ecx: u32) -> CpuidResult {
42+
let mut r = ::std::mem::uninitialized::<CpuidResult>();
43+
asm!("cpuid"
44+
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
45+
: "{eax}"(eax), "{ecx}"(ecx)
46+
: :);
47+
r
48+
}
49+
50+
/// `cpuid` instruction.
51+
///
52+
/// See `__cpuid_count`.
53+
#[inline(always)]
54+
#[cfg_attr(test, assert_instr(cpuid))]
55+
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
56+
pub unsafe fn __cpuid(eax: u32) -> CpuidResult {
57+
__cpuid_count(eax, 0)
58+
}
59+
60+
/// Does the host support the `cpuid` instruction?
61+
#[inline(always)]
62+
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
63+
pub fn has_cpuid() -> bool {
64+
#[cfg(target_arch = "x86_64")]
65+
{
66+
true
67+
}
68+
#[cfg(target_arch = "x86")]
69+
{
70+
use super::ia32::{__readeflags, __writeeflags};
71+
72+
// On `x86` the `cpuid` instruction is not always available.
73+
// This follows the approach indicated in:
74+
// http://wiki.osdev.org/CPUID#Checking_CPUID_availability
75+
unsafe {
76+
// Read EFLAGS:
77+
let eflags: u32 = __readeflags();
78+
79+
// Invert the ID bit in EFLAGS:
80+
let eflags_mod: u32 = eflags | 0x0020_0000;
81+
82+
// Store the modified EFLAGS (ID bit may or may not be inverted)
83+
__writeeflags(eflags_mod);
84+
85+
// Read EFLAGS again:
86+
let eflags_after: u32 = __readeflags();
87+
88+
// Check if the ID bit changed:
89+
eflags_after != eflags
90+
}
91+
}
92+
}
93+
94+
#[cfg(test)]
95+
mod tests {
96+
use super::*;
97+
98+
#[test]
99+
fn test_always_has_cpuid() {
100+
// all currently-tested targets have the instruction
101+
// FIXME: add targets without `cpuid` to CI
102+
assert!(has_cpuid());
103+
}
104+
105+
#[cfg(target_arch = "x86")]
106+
#[test]
107+
fn test_has_cpuid() {
108+
use vendor::__readeflags;
109+
unsafe {
110+
let before = __readeflags();
111+
112+
if has_cpuid() {
113+
assert!(before != __readeflags());
114+
} else {
115+
assert!(before == __readeflags());
116+
}
117+
}
118+
}
119+
120+
}

src/x86/ia32.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//! `i386/ia32` intrinsics
2+
3+
/// Reads EFLAGS.
4+
#[cfg(target_arch = "x86")]
5+
#[inline(always)]
6+
pub unsafe fn __readeflags() -> u32 {
7+
let eflags: u32;
8+
asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
9+
eflags
10+
}
11+
12+
/// Reads EFLAGS.
13+
#[cfg(target_arch = "x86_64")]
14+
#[inline(always)]
15+
pub unsafe fn __readeflags() -> u64 {
16+
let eflags: u64;
17+
asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
18+
eflags
19+
}
20+
21+
/// Write EFLAGS.
22+
#[cfg(target_arch = "x86")]
23+
#[inline(always)]
24+
pub unsafe fn __writeeflags(eflags: u32) {
25+
asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
26+
}
27+
28+
/// Write EFLAGS.
29+
#[cfg(target_arch = "x86_64")]
30+
#[inline(always)]
31+
pub unsafe fn __writeeflags(eflags: u64) {
32+
asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
33+
}
34+
35+
#[cfg(test)]
36+
mod tests {
37+
use super::*;
38+
39+
#[test]
40+
fn test_eflags() {
41+
unsafe {
42+
// reads eflags, writes them back, reads them again,
43+
// and compare for equality:
44+
let v = __readeflags();
45+
__writeeflags(v);
46+
let u = __readeflags();
47+
assert_eq!(v, u);
48+
}
49+
}
50+
}

src/x86/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
//! `x86` and `x86_64` intrinsics.
22
3+
pub use self::ia32::*;
4+
pub use self::cpuid::*;
5+
pub use self::xsave::*;
6+
37
pub use self::sse::*;
48
pub use self::sse2::*;
59
pub use self::sse3::*;
@@ -28,6 +32,10 @@ mod macros;
2832
#[macro_use]
2933
mod runtime;
3034

35+
mod ia32;
36+
mod cpuid;
37+
mod xsave;
38+
3139
mod sse;
3240
mod sse2;
3341
mod sse3;

src/x86/runtime.rs

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -159,31 +159,37 @@ fn test_bit(x: usize, bit: u32) -> bool {
159159
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
160160
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
161161
fn detect_features() -> usize {
162-
let extended_features_ebx;
163-
let proc_info_ecx;
164-
let proc_info_edx;
162+
use super::cpuid::{__cpuid, has_cpuid, CpuidResult};
163+
let mut value: usize = 0;
165164

166-
unsafe {
167-
/// To obtain all feature flags we need two CPUID queries:
165+
// If the x86 CPU does not support the CPUID instruction then it is too
166+
// old to support any of the currently-detectable features.
167+
if !has_cpuid() {
168+
return value;
169+
}
168170

169-
/// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
170-
/// This gives us most of the CPU features in ECX and EDX (see
171-
/// below).
172-
asm!("cpuid"
173-
: "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx)
174-
: "{eax}"(0x0000_0001_u32), "{ecx}"(0 as u32)
175-
: :);
171+
// Calling `cpuid` from here on is safe because the CPU has the `cpuid`
172+
// instruction.
176173

177-
/// 2. EAX=7, ECX=0: Queries "Extended Features"
178-
/// This gives us information about bmi,bmi2, and avx2 support
179-
/// (see below); the result in ECX is not currently needed.
180-
asm!("cpuid"
181-
: "={ebx}"(extended_features_ebx)
182-
: "{eax}"(0x0000_0007_u32), "{ecx}"(0 as u32)
183-
: :);
184-
}
174+
// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits";
175+
// Contains information about most x86 features.
176+
let CpuidResult {
177+
ecx: proc_info_ecx,
178+
edx: proc_info_edx,
179+
..
180+
} = unsafe { __cpuid(0x0000_0001_u32) };
185181

186-
let mut value: usize = 0;
182+
// 2. EAX=7, ECX=0: Queries "Extended Features";
183+
// Contains information about bmi,bmi2, and avx2 support.
184+
let CpuidResult {
185+
ebx: extended_features_ebx,
186+
..
187+
} = unsafe { __cpuid(0x0000_0007_u32) };
188+
189+
let proc_info_ecx = proc_info_ecx as usize;
190+
let proc_info_edx = proc_info_edx as usize;
191+
192+
let extended_features_ebx = extended_features_ebx as usize;
187193

188194
if test_bit(extended_features_ebx, 3) {
189195
value = set_bit(value, __Feature::bmi as u32);
@@ -233,21 +239,10 @@ fn detect_features() -> usize {
233239
// org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
234240
//
235241
if test_bit(proc_info_ecx, 26) && test_bit(proc_info_ecx, 27) {
236-
/// XGETBV: reads the contents of the extended control
237-
/// register (XCR).
238-
unsafe fn xgetbv(xcr_no: u32) -> u64 {
239-
let eax: u32;
240-
let edx: u32;
241-
// xgetbv
242-
asm!("xgetbv"
243-
: "={eax}"(eax), "={edx}"(edx)
244-
: "{ecx}"(xcr_no)
245-
: :);
246-
((edx as u64) << 32) | (eax as u64)
247-
}
242+
use super::xsave::_xgetbv;
248243

249244
// This is safe because on x86 `xgetbv` is always available.
250-
if unsafe { xgetbv(0) } & 6 == 6 {
245+
if unsafe { _xgetbv(0) } & 6 == 6 {
251246
if test_bit(proc_info_ecx, 28) {
252247
value = set_bit(value, __Feature::avx as u32);
253248
}

src/x86/sse2.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
17921792
#[inline(always)]
17931793
#[target_feature = "+sse2"]
17941794
#[cfg_attr(test, assert_instr(cvtsd2si))]
1795-
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
1795+
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
1796+
_mm_cvtsd_si64(a)
1797+
}
17961798

17971799
/// Convert the lower double-precision (64-bit) floating-point element in `b`
17981800
/// to a single-precision (32-bit) floating-point element, store the result in
@@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
18571859
#[inline(always)]
18581860
#[target_feature = "+sse2"]
18591861
#[cfg_attr(test, assert_instr(cvttsd2si))]
1860-
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
1862+
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
1863+
_mm_cvttsd_si64(a)
1864+
}
18611865

18621866
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
18631867
/// packed 32-bit integers with truncation.

src/x86/xsave.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//! `xsave` target feature intrinsics
2+
3+
#[cfg(test)]
4+
use stdsimd_test::assert_instr;
5+
6+
/// Reads the contents of the extended control register `XCR`
7+
/// specified in `xcr_no`.
8+
#[inline(always)]
9+
// #[target_feature = "+xsave"] // FIXME: see
10+
// https://github.com/rust-lang-nursery/stdsimd/issues/167
11+
#[cfg_attr(test, assert_instr(xgetbv))]
12+
pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
13+
let eax: u32;
14+
let edx: u32;
15+
16+
asm!("xgetbv"
17+
: "={eax}"(eax), "={edx}"(edx)
18+
: "{ecx}"(xcr_no)
19+
: :);
20+
21+
((edx as u64) << 32) | (eax as u64)
22+
}

0 commit comments

Comments
 (0)