diff --git a/coresimd/src/runtime/x86.rs b/coresimd/src/runtime/x86.rs index 51482e4a75..0780210743 100644 --- a/coresimd/src/runtime/x86.rs +++ b/coresimd/src/runtime/x86.rs @@ -29,6 +29,9 @@ use super::bit; #[macro_export] #[doc(hidden)] macro_rules! __unstable_detect_feature { + ("mmx") => { + $crate::vendor::__unstable_detect_feature( + $crate::vendor::__Feature::mmx{}) }; ("sse") => { $crate::vendor::__unstable_detect_feature( $crate::vendor::__Feature::sse{}) }; @@ -165,6 +168,8 @@ macro_rules! __unstable_detect_feature { #[allow(non_camel_case_types)] #[repr(u8)] pub enum __Feature { + /// MMX + mmx, /// SSE (Streaming SIMD Extensions) sse, /// SSE2 (Streaming SIMD Extensions 2) @@ -332,6 +337,7 @@ pub fn detect_features() -> usize { enable(proc_info_ecx, 20, __Feature::sse4_2); enable(proc_info_ecx, 23, __Feature::popcnt); enable(proc_info_edx, 24, __Feature::fxsr); + enable(proc_info_edx, 23, __Feature::mmx); enable(proc_info_edx, 25, __Feature::sse); enable(proc_info_edx, 26, __Feature::sse2); diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs index 5ed8005b30..dfa2b0c2e8 100644 --- a/coresimd/src/x86/i586/sse.rs +++ b/coresimd/src/x86/i586/sse.rs @@ -626,10 +626,6 @@ pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 { _mm_cvtss_si32(a) } -// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74 -// pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 -// pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) } - /// Convert the lowest 32 bit float in the input vector to a 32 bit integer /// with /// truncation. @@ -655,10 +651,6 @@ pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 { _mm_cvttss_si32(a) } -// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74 -// pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2; -// pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) } - /// Extract the lowest 32 bit float from the input vector. #[inline(always)] #[target_feature = "+sse"] diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs new file mode 100644 index 0000000000..54aa8c1bbc --- /dev/null +++ b/coresimd/src/x86/i686/mmx.rs @@ -0,0 +1,88 @@ +//! `i586` MMX instruction set. +//! +//! The intrinsics here roughly correspond to those in the `mmintrin.h` C +//! header. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + +use v64::{i16x4, i32x2, i8x8}; +use x86::__m64; +use core::mem; + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// Constructs a 64-bit integer vector initialized to zero. +#[inline(always)] +#[target_feature = "+mmx,+sse"] +// FIXME: this produces a movl instead of xorps on x86 +// FIXME: this produces a xor intrinsic instead of xorps on x86_64 +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))] +pub unsafe fn _mm_setzero_si64() -> __m64 { + mem::transmute(0_i64) +} + +/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// using signed saturation. +/// +/// Positive values greater than 0x7F are saturated to 0x7F. Negative values +/// less than 0x80 are saturated to 0x80. +#[inline(always)] +#[target_feature = "+mmx,+sse"] +#[cfg_attr(test, assert_instr(packsswb))] +pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 { + mem::transmute(packsswb(mem::transmute(a), mem::transmute(b))) +} + +/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// using signed saturation. +/// +/// Positive values greater than 0x7F are saturated to 0x7F. Negative values +/// less than 0x80 are saturated to 0x80. +#[inline(always)] +#[target_feature = "+mmx,+sse"] +#[cfg_attr(test, assert_instr(packssdw))] +pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 { + mem::transmute(packssdw(mem::transmute(a), mem::transmute(b))) +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.mmx.packsswb"] + fn packsswb(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.packssdw"] + fn packssdw(a: __m64, b: __m64) -> __m64; +} + +#[cfg(test)] +mod tests { + use v64::{i16x4, i32x2, i8x8}; + use x86::i686::mmx; + use x86::__m64; + use stdsimd_test::simd_test; + + #[simd_test = "sse"] // FIXME: should be mmx + unsafe fn _mm_setzero_si64() { + let r: __m64 = ::std::mem::transmute(0_i64); + assert_eq!(r, mmx::_mm_setzero_si64()); + } + + #[simd_test = "sse"] // FIXME: should be mmx + unsafe fn _mm_packs_pi16() { + let a = i16x4::new(-1, 2, -3, 4); + let b = i16x4::new(-5, 6, -7, 8); + let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8); + assert_eq!(r, mmx::_mm_packs_pi16(a, b)); + } + + #[simd_test = "sse"] // FIXME: should be mmx + unsafe fn _mm_packs_pi32() { + let a = i32x2::new(-1, 2); + let b = i32x2::new(-5, 6); + let r = i16x4::new(-1, 2, -5, 6); + assert_eq!(r, mmx::_mm_packs_pi32(a, b)); + } +} diff --git a/coresimd/src/x86/i686/mod.rs b/coresimd/src/x86/i686/mod.rs index 809f091c95..c6e15274b1 100644 --- a/coresimd/src/x86/i686/mod.rs +++ b/coresimd/src/x86/i686/mod.rs @@ -1,5 +1,8 @@ //! `i686` intrinsics +mod mmx; +pub use self::mmx::*; + mod sse; pub use self::sse::*; diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs index 912f0da764..a328fa688b 100644 --- a/coresimd/src/x86/i686/sse.rs +++ b/coresimd/src/x86/i686/sse.rs @@ -1,17 +1,15 @@ //! `i686` Streaming SIMD Extensions (SSE) -use v64::{i16x4, u8x8}; +use v128::f32x4; +use v64::{i16x4, i32x2, i8x8, u8x8}; +use x86::__m64; use core::mem; +use x86::i586; +use x86::i686::mmx; #[cfg(test)] use stdsimd_test::assert_instr; -/// This type is only required for mapping vector types to llvm's `x86_mmx` -/// type. -#[allow(non_camel_case_types)] -#[repr(simd)] -struct __m64(i64); - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.mmx.pmaxs.w"] @@ -22,6 +20,10 @@ extern "C" { fn pminsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pminu.b"] fn pminub(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.sse.cvtps2pi"] + fn cvtps2pi(a: f32x4) -> __m64; + #[link_name = "llvm.x86.sse.cvttps2pi"] + fn cvttps2pi(a: f32x4) -> __m64; } /// Compares the packed 16-bit signed integers of `a` and `b` writing the @@ -96,9 +98,70 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 { _mm_min_pu8(a, b) } +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers with truncation. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvttps2pi))] +pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 { + mem::transmute(cvttps2pi(a)) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers with truncation. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvttps2pi))] +pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { + _mm_cvttps_pi32(a) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 { + mem::transmute(cvtps2pi(a)) +} + +/// Convert the two lower packed single-precision (32-bit) floating-point +/// elements in `a` to packed 32-bit integers. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { + _mm_cvtps_pi32(a) +} + +/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// packed 16-bit integers. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 { + let b = _mm_cvtps_pi32(a); + let a = i586::_mm_movehl_ps(a, a); + let c = _mm_cvtps_pi32(a); + mmx::_mm_packs_pi32(b, c) +} + +/// Convert packed single-precision (32-bit) floating-point elements in `a` to +/// packed 8-bit integers, and returns theem in the lower 4 elements of the +/// result. +#[inline(always)] +#[target_feature = "+sse"] +#[cfg_attr(test, assert_instr(cvtps2pi))] +pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 { + let b = _mm_cvtps_pi16(a); + let c = mmx::_mm_setzero_si64(); + mmx::_mm_packs_pi16(b, mem::transmute(c)) +} + #[cfg(test)] mod tests { - use v64::{i16x4, u8x8}; + use v128::f32x4; + use v64::{i16x4, i32x2, i8x8, u8x8}; use x86::i686::sse; use stdsimd_test::simd_test; @@ -141,4 +204,36 @@ mod tests { assert_eq!(r, sse::_mm_min_pu8(a, b)); assert_eq!(r, sse::_m_pminub(a, b)); } + + #[simd_test = "sse"] + unsafe fn _mm_cvtps_pi32() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let r = i32x2::new(1, 2); + + assert_eq!(r, sse::_mm_cvtps_pi32(a)); + assert_eq!(r, sse::_mm_cvt_ps2pi(a)); + } + + #[simd_test = "sse"] + unsafe fn _mm_cvttps_pi32() { + let a = f32x4::new(7.0, 2.0, 3.0, 4.0); + let r = i32x2::new(7, 2); + + assert_eq!(r, sse::_mm_cvttps_pi32(a)); + assert_eq!(r, sse::_mm_cvtt_ps2pi(a)); + } + + #[simd_test = "sse"] + unsafe fn _mm_cvtps_pi16() { + let a = f32x4::new(7.0, 2.0, 3.0, 4.0); + let r = i16x4::new(7, 2, 3, 4); + assert_eq!(r, sse::_mm_cvtps_pi16(a)); + } + + #[simd_test = "sse"] + unsafe fn _mm_cvtps_pi8() { + let a = f32x4::new(7.0, 2.0, 3.0, 4.0); + let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0); + assert_eq!(r, sse::_mm_cvtps_pi8(a)); + } } diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs index fcbcdead03..79f4a23a2a 100644 --- a/coresimd/src/x86/mod.rs +++ b/coresimd/src/x86/mod.rs @@ -26,6 +26,11 @@ mod x86_64; #[cfg(target_arch = "x86_64")] pub use self::x86_64::*; +/// 64-bit wide integer vector type. +#[allow(non_camel_case_types)] +#[repr(simd)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct __m64(i64); // corresponds to llvm's `x86_mmx` type /// 128-bit wide signed integer vector type #[allow(non_camel_case_types)] pub type __m128i = ::v128::i8x16;