Skip to content

Commit e45c93a

Browse files
committed
[x86] expose cpuid and xgetbv intrinsics
1 parent 4c244fb commit e45c93a

File tree

4 files changed

+92
-35
lines changed

4 files changed

+92
-35
lines changed

src/x86/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
//! `x86` and `x86_64` intrinsics.
22
3+
pub use self::x86::*;
4+
35
pub use self::sse::*;
46
pub use self::sse2::*;
57
pub use self::sse3::*;
@@ -28,6 +30,8 @@ mod macros;
2830
#[macro_use]
2931
mod runtime;
3032

33+
mod x86;
34+
3135
mod sse;
3236
mod sse2;
3337
mod sse3;

src/x86/runtime.rs

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -159,32 +159,30 @@ fn test_bit(x: usize, bit: u32) -> bool {
159159
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
160160
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
161161
fn detect_features() -> usize {
162-
let extended_features_ebx;
163-
let proc_info_ecx;
164-
let proc_info_edx;
162+
use super::x86::{cpuid, CpuidResult};
165163

166-
unsafe {
167-
/// To obtain all feature flags we need two CPUID queries:
164+
// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits";
165+
// Contains information about most x86 features.
166+
let CpuidResult {
167+
ecx: proc_info_ecx,
168+
edx: proc_info_edx,
169+
..
170+
} = cpuid(0x0000_0001_u32, 0);
168171

169-
/// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
170-
/// This gives us most of the CPU features in ECX and EDX (see
171-
/// below).
172-
asm!("cpuid"
173-
: "={ecx}"(proc_info_ecx), "={edx}"(proc_info_edx)
174-
: "{eax}"(0x0000_0001_u32), "{ecx}"(0 as u32)
175-
: :);
176-
177-
/// 2. EAX=7, ECX=0: Queries "Extended Features"
178-
/// This gives us information about bmi,bmi2, and avx2 support
179-
/// (see below); the result in ECX is not currently needed.
180-
asm!("cpuid"
181-
: "={ebx}"(extended_features_ebx)
182-
: "{eax}"(0x0000_0007_u32), "{ecx}"(0 as u32)
183-
: :);
184-
}
172+
// 2. EAX=7, ECX=0: Queries "Extended Features";
173+
// Contains information about bmi,bmi2, and avx2 support.
174+
let CpuidResult {
175+
ebx: extended_features_ebx,
176+
..
177+
} = cpuid(0x0000_0007_u32, 0);
185178

186179
let mut value: usize = 0;
187180

181+
let proc_info_ecx = proc_info_ecx as usize;
182+
let proc_info_edx = proc_info_edx as usize;
183+
184+
let extended_features_ebx = extended_features_ebx as usize;
185+
188186
if test_bit(extended_features_ebx, 3) {
189187
value = set_bit(value, __Feature::bmi as u32);
190188
}
@@ -233,18 +231,7 @@ fn detect_features() -> usize {
233231
// org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
234232
//
235233
if test_bit(proc_info_ecx, 26) && test_bit(proc_info_ecx, 27) {
236-
/// XGETBV: reads the contents of the extended control
237-
/// register (XCR).
238-
unsafe fn xgetbv(xcr_no: u32) -> u64 {
239-
let eax: u32;
240-
let edx: u32;
241-
// xgetbv
242-
asm!("xgetbv"
243-
: "={eax}"(eax), "={edx}"(edx)
244-
: "{ecx}"(xcr_no)
245-
: :);
246-
((edx as u64) << 32) | (eax as u64)
247-
}
234+
use super::x86::xgetbv;
248235

249236
// This is safe because on x86 `xgetbv` is always available.
250237
if unsafe { xgetbv(0) } & 6 == 6 {

src/x86/sse2.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,9 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
17921792
#[inline(always)]
17931793
#[target_feature = "+sse2"]
17941794
#[cfg_attr(test, assert_instr(cvtsd2si))]
1795-
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
1795+
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
1796+
_mm_cvtsd_si64(a)
1797+
}
17961798

17971799
/// Convert the lower double-precision (64-bit) floating-point element in `b`
17981800
/// to a single-precision (32-bit) floating-point element, store the result in
@@ -1857,7 +1859,9 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
18571859
#[inline(always)]
18581860
#[target_feature = "+sse2"]
18591861
#[cfg_attr(test, assert_instr(cvttsd2si))]
1860-
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
1862+
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
1863+
_mm_cvttsd_si64(a)
1864+
}
18611865

18621866
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
18631867
/// packed 32-bit integers with truncation.

src/x86/x86.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//! x86 intrinsics available on all hosts
2+
3+
#[cfg(test)]
4+
use stdsimd_test::assert_instr;
5+
6+
/// Result of the `cpuid` instruction.
7+
pub struct CpuidResult {
8+
pub eax: u32,
9+
pub ebx: u32,
10+
pub ecx: u32,
11+
pub edx: u32,
12+
}
13+
14+
/// CPUID instruction.
15+
///
16+
/// The `request` parameter is passed in the `eax` register and the `ext`
17+
/// parameter in the `ecx` register.
18+
///
19+
/// The [CPUID Wikipedia page][wiki_cpuid] contains
20+
/// all the information about which flags to set to query which values, and in
21+
/// which registers these are reported.
22+
///
23+
/// The definitive references are:
24+
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
25+
/// Instruction Set Reference, A-Z][intel64_ref].
26+
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
27+
/// System Instructions][amd64_ref].
28+
///
29+
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
30+
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
31+
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
32+
#[inline(always)]
33+
#[cfg_attr(test, assert_instr(cpuid))]
34+
pub fn cpuid(request: u32, ext: u32) -> CpuidResult {
35+
unsafe {
36+
let mut r = ::std::mem::uninitialized::<CpuidResult>();
37+
asm!("cpuid"
38+
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
39+
: "{eax}"(request), "{ecx}"(ext)
40+
: :);
41+
r
42+
}
43+
}
44+
45+
/// Reads the contents of the extended control register `XCR`
46+
/// specified in `xcr_no`.
47+
#[inline(always)]
48+
// FIXME: see
49+
// https://github.com/rust-lang-nursery/stdsimd/issues/167
50+
// #[target_feature = "+xsave"]
51+
#[cfg_attr(test, assert_instr(xgetbv))]
52+
pub unsafe fn xgetbv(xcr_no: u32) -> u64 {
53+
let eax: u32;
54+
let edx: u32;
55+
56+
asm!("xgetbv"
57+
: "={eax}"(eax), "={edx}"(edx)
58+
: "{ecx}"(xcr_no)
59+
: :);
60+
61+
((edx as u64) << 32) | (eax as u64)
62+
}

0 commit comments

Comments
 (0)