Skip to content

Commit d985a84

Browse files
gnzlbgalexcrichton
authored andcommitted
refactor the x86 module (rust-lang#195)
* refactor the x86 module * document the i686 check * document strict and intel_sde feature * document nvptx module
1 parent c68f66a commit d985a84

29 files changed

+873
-742
lines changed

Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ stdsimd-test = { version = "0.*", path = "stdsimd-test" }
3131
cupid = "0.4.0"
3232

3333
[features]
34-
strict = []
3534
std = []
35+
36+
# Internal-only: denies all warnings.
37+
strict = []
38+
# Internal-only: enables only those intrinsics supported by Intel's
39+
# Software Development Environment (SDE).
3640
intel_sde = []

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ case ${TARGET} in
1616
esac
1717

1818
FEATURES="strict,$FEATURES"
19-
FEATURES_STD="${FEATURES},std"
19+
FEATURES_STD="$std,${FEATURES}"
2020

2121
echo "RUSTFLAGS=${RUSTFLAGS}"
2222
echo "FEATURES=${FEATURES}"

src/nvptx/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
//! nvptx intrinsics
1+
//! NVPTX intrinsics (experimental)
2+
//!
3+
//! These intrinsics form the foundation of the CUDA
4+
//! programming model.
5+
//!
6+
//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also the [LLVM NVPTX Backend documentation][llvm_docs].
7+
//!
8+
//! [cuda_c]:
9+
//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
10+
//! [llvm_docs]:
11+
//! https://llvm.org/docs/NVPTXUsage.html
212
313
#[allow(improper_ctypes)]
414
extern "C" {

src/x86/ia32.rs renamed to src/x86/i386/eflags.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//! `i386/ia32` intrinsics
1+
//! `i386` intrinsics
22
33
/// Reads EFLAGS.
44
#[cfg(target_arch = "x86")]
@@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {
3434

3535
#[cfg(test)]
3636
mod tests {
37-
use super::*;
37+
use x86::i386::*;
3838

3939
#[test]
4040
fn test_eflags() {

src/x86/i386/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
//! `i386` intrinsics
2+
3+
mod eflags;
4+
pub use self::eflags::*;

src/x86/abm.rs renamed to src/x86/i586/abm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ pub unsafe fn _popcnt64(x: u64) -> u64 {
6161
mod tests {
6262
use stdsimd_test::simd_test;
6363

64-
use x86::abm;
64+
use x86::i586::abm;
6565

6666
#[simd_test = "lzcnt"]
6767
unsafe fn _lzcnt_u32() {

src/x86/avx.rs renamed to src/x86/i586/avx.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,7 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
986986
#[target_feature = "+avx,+sse"]
987987
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
988988
pub unsafe fn _mm_permute_ps(a: f32x4, imm8: i32) -> f32x4 {
989-
use x86::sse::_mm_undefined_ps;
989+
use x86::i586::sse::_mm_undefined_ps;
990990

991991
let imm8 = (imm8 & 0xFF) as u8;
992992
macro_rules! shuffle4 {
@@ -1100,7 +1100,7 @@ pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
11001100
#[target_feature = "+avx,+sse2"]
11011101
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
11021102
pub unsafe fn _mm_permute_pd(a: f64x2, imm8: i32) -> f64x2 {
1103-
use x86::sse2::_mm_undefined_pd;
1103+
use x86::i586::sse2::_mm_undefined_pd;
11041104

11051105
let imm8 = (imm8 & 0xFF) as u8;
11061106
macro_rules! shuffle2 {
@@ -2159,7 +2159,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
21592159
#[inline(always)]
21602160
#[target_feature = "+avx,+sse"]
21612161
pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
2162-
use x86::sse::_mm_setzero_ps;
2162+
use x86::i586::sse::_mm_setzero_ps;
21632163
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
21642164
}
21652165

@@ -2169,7 +2169,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
21692169
#[inline(always)]
21702170
#[target_feature = "+avx,+sse2"]
21712171
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2172-
use x86::sse2::_mm_setzero_si128;
2172+
use x86::i586::sse2::_mm_setzero_si128;
21732173
let b = mem::transmute(_mm_setzero_si128());
21742174
let dst: i64x4 = simd_shuffle4(i64x2::from(a), b, [0, 1, 2, 3]);
21752175
__m256i::from(dst)
@@ -2182,7 +2182,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
21822182
#[inline(always)]
21832183
#[target_feature = "+avx,+sse2"]
21842184
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
2185-
use x86::sse2::_mm_setzero_pd;
2185+
use x86::i586::sse2::_mm_setzero_pd;
21862186
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
21872187
}
21882188

@@ -2268,7 +2268,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
22682268
pub unsafe fn _mm256_loadu2_m128(
22692269
hiaddr: *const f32, loaddr: *const f32
22702270
) -> f32x8 {
2271-
use x86::sse::_mm_loadu_ps;
2271+
use x86::i586::sse::_mm_loadu_ps;
22722272
let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
22732273
_mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
22742274
}
@@ -2282,7 +2282,7 @@ pub unsafe fn _mm256_loadu2_m128(
22822282
pub unsafe fn _mm256_loadu2_m128d(
22832283
hiaddr: *const f64, loaddr: *const f64
22842284
) -> f64x4 {
2285-
use x86::sse2::_mm_loadu_pd;
2285+
use x86::i586::sse2::_mm_loadu_pd;
22862286
let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
22872287
_mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
22882288
}
@@ -2295,7 +2295,7 @@ pub unsafe fn _mm256_loadu2_m128d(
22952295
pub unsafe fn _mm256_loadu2_m128i(
22962296
hiaddr: *const __m128i, loaddr: *const __m128i
22972297
) -> __m256i {
2298-
use x86::sse2::_mm_loadu_si128;
2298+
use x86::i586::sse2::_mm_loadu_si128;
22992299
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
23002300
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
23012301
}
@@ -2309,7 +2309,7 @@ pub unsafe fn _mm256_loadu2_m128i(
23092309
pub unsafe fn _mm256_storeu2_m128(
23102310
hiaddr: *mut f32, loaddr: *mut f32, a: f32x8
23112311
) {
2312-
use x86::sse::_mm_storeu_ps;
2312+
use x86::i586::sse::_mm_storeu_ps;
23132313
let lo = _mm256_castps256_ps128(a);
23142314
_mm_storeu_ps(loaddr, lo);
23152315
let hi = _mm256_extractf128_ps(a, 1);
@@ -2325,7 +2325,7 @@ pub unsafe fn _mm256_storeu2_m128(
23252325
pub unsafe fn _mm256_storeu2_m128d(
23262326
hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
23272327
) {
2328-
use x86::sse2::_mm_storeu_pd;
2328+
use x86::i586::sse2::_mm_storeu_pd;
23292329
let lo = _mm256_castpd256_pd128(a);
23302330
_mm_storeu_pd(loaddr, lo);
23312331
let hi = _mm256_extractf128_pd(a, 1);
@@ -2340,7 +2340,7 @@ pub unsafe fn _mm256_storeu2_m128d(
23402340
pub unsafe fn _mm256_storeu2_m128i(
23412341
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
23422342
) {
2343-
use x86::sse2::_mm_storeu_si128;
2343+
use x86::i586::sse2::_mm_storeu_si128;
23442344
let lo = _mm256_castsi256_si128(a);
23452345
_mm_storeu_si128(loaddr, lo);
23462346
let hi = _mm256_extractf128_si256(a, 1);
@@ -2501,7 +2501,7 @@ mod tests {
25012501

25022502
use v128::{f32x4, f64x2, i32x4, i64x2, i8x16};
25032503
use v256::*;
2504-
use x86::avx;
2504+
use x86::i586::avx;
25052505
use x86::{__m128i, __m256i};
25062506

25072507
#[simd_test = "avx"]
@@ -4173,7 +4173,7 @@ mod tests {
41734173

41744174
#[simd_test = "avx"]
41754175
unsafe fn _mm256_storeu2_m128() {
4176-
use x86::sse::_mm_undefined_ps;
4176+
use x86::i586::sse::_mm_undefined_ps;
41774177
let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
41784178
let mut hi = _mm_undefined_ps();
41794179
let mut lo = _mm_undefined_ps();
@@ -4188,7 +4188,7 @@ mod tests {
41884188

41894189
#[simd_test = "avx"]
41904190
unsafe fn _mm256_storeu2_m128d() {
4191-
use x86::sse2::_mm_undefined_pd;
4191+
use x86::i586::sse2::_mm_undefined_pd;
41924192
let a = f64x4::new(1., 2., 3., 4.);
41934193
let mut hi = _mm_undefined_pd();
41944194
let mut lo = _mm_undefined_pd();
@@ -4203,7 +4203,7 @@ mod tests {
42034203

42044204
#[simd_test = "avx"]
42054205
unsafe fn _mm256_storeu2_m128i() {
4206-
use x86::sse2::_mm_undefined_si128;
4206+
use x86::i586::sse2::_mm_undefined_si128;
42074207
#[cfg_attr(rustfmt, rustfmt_skip)]
42084208
let a = i8x32::new(
42094209
1, 2, 3, 4, 5, 6, 7, 8,

src/x86/avx2.rs renamed to src/x86/i586/avx2.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2217,7 +2217,7 @@ mod tests {
22172217

22182218
use v256::*;
22192219
use v128::*;
2220-
use x86::avx2;
2220+
use x86::i586::avx2;
22212221
use x86::__m256i;
22222222
use std;
22232223

src/x86/bmi.rs renamed to src/x86/i586/bmi.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ extern "C" {
192192
mod tests {
193193
use stdsimd_test::simd_test;
194194

195-
use x86::bmi;
195+
use x86::i586::bmi;
196196

197197
#[simd_test = "bmi"]
198198
unsafe fn _bextr_u32() {

src/x86/bmi2.rs renamed to src/x86/i586/bmi2.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ extern "C" {
118118
mod tests {
119119
use stdsimd_test::simd_test;
120120

121-
use x86::bmi2;
121+
use x86::i586::bmi2;
122122

123123
#[simd_test = "bmi2"]
124124
unsafe fn _pext_u32() {

src/x86/cpuid.rs renamed to src/x86/i586/cpuid.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ pub fn has_cpuid() -> bool {
7777
}
7878
#[cfg(target_arch = "x86")]
7979
{
80-
use super::ia32::{__readeflags, __writeeflags};
80+
use x86::i386::{__readeflags, __writeeflags};
8181

8282
// On `x86` the `cpuid` instruction is not always available.
8383
// This follows the approach indicated in:
@@ -119,23 +119,23 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
119119

120120
#[cfg(test)]
121121
mod tests {
122-
use super::*;
122+
use x86::i586::cpuid;
123123

124124
#[test]
125125
fn test_always_has_cpuid() {
126126
// all currently-tested targets have the instruction
127127
// FIXME: add targets without `cpuid` to CI
128-
assert!(has_cpuid());
128+
assert!(cpuid::has_cpuid());
129129
}
130130

131131
#[cfg(target_arch = "x86")]
132132
#[test]
133133
fn test_has_cpuid() {
134-
use vendor::__readeflags;
134+
use x86::i386::__readeflags;
135135
unsafe {
136136
let before = __readeflags();
137137

138-
if has_cpuid() {
138+
if cpuid::has_cpuid() {
139139
assert!(before != __readeflags());
140140
} else {
141141
assert!(before == __readeflags());

src/x86/i586/mod.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//! `i586` intrinsics
2+
3+
pub use self::cpuid::*;
4+
pub use self::xsave::*;
5+
6+
pub use self::sse::*;
7+
pub use self::sse2::*;
8+
pub use self::sse3::*;
9+
pub use self::ssse3::*;
10+
pub use self::sse41::*;
11+
pub use self::sse42::*;
12+
pub use self::avx::*;
13+
pub use self::avx2::*;
14+
15+
pub use self::abm::*;
16+
pub use self::bmi::*;
17+
pub use self::bmi2::*;
18+
19+
#[cfg(not(feature = "intel_sde"))]
20+
pub use self::tbm::*;
21+
22+
mod cpuid;
23+
mod xsave;
24+
25+
mod sse;
26+
mod sse2;
27+
mod sse3;
28+
mod ssse3;
29+
mod sse41;
30+
mod sse42;
31+
mod avx;
32+
mod avx2;
33+
34+
mod abm;
35+
mod bmi;
36+
mod bmi2;
37+
38+
#[cfg(not(feature = "intel_sde"))]
39+
mod tbm;

0 commit comments

Comments
 (0)