Skip to content

add mmx module, mmx run-time detection, intrinsics #220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions coresimd/src/runtime/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ use super::bit;
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("mmx") => {
$crate::vendor::__unstable_detect_feature(
$crate::vendor::__Feature::mmx{}) };
("sse") => {
$crate::vendor::__unstable_detect_feature(
$crate::vendor::__Feature::sse{}) };
Expand Down Expand Up @@ -165,6 +168,8 @@ macro_rules! __unstable_detect_feature {
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// MMX
mmx,
/// SSE (Streaming SIMD Extensions)
sse,
/// SSE2 (Streaming SIMD Extensions 2)
Expand Down Expand Up @@ -332,6 +337,7 @@ pub fn detect_features() -> usize {
enable(proc_info_ecx, 20, __Feature::sse4_2);
enable(proc_info_ecx, 23, __Feature::popcnt);
enable(proc_info_edx, 24, __Feature::fxsr);
enable(proc_info_edx, 23, __Feature::mmx);
enable(proc_info_edx, 25, __Feature::sse);
enable(proc_info_edx, 26, __Feature::sse2);

Expand Down
8 changes: 0 additions & 8 deletions coresimd/src/x86/i586/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -626,10 +626,6 @@ pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
_mm_cvtss_si32(a)
}

// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
// pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2
// pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { _mm_cvtps_pi32(a) }

/// Convert the lowest 32 bit float in the input vector to a 32 bit integer
/// with
/// truncation.
Expand All @@ -655,10 +651,6 @@ pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
_mm_cvttss_si32(a)
}

// Blocked by https://github.com/rust-lang-nursery/stdsimd/issues/74
// pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2;
// pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { _mm_cvttps_pi32(a) }

/// Extract the lowest 32 bit float from the input vector.
#[inline(always)]
#[target_feature = "+sse"]
Expand Down
88 changes: 88 additions & 0 deletions coresimd/src/x86/i686/mmx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
//! `i586` MMX instruction set.
//!
//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
//! header.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf

use v64::{i16x4, i32x2, i8x8};
use x86::__m64;
use core::mem;

#[cfg(test)]
use stdsimd_test::assert_instr;

/// Constructs a 64-bit integer vector initialized to zero.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
// FIXME: this produces a movl instead of xorps on x86
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
pub unsafe fn _mm_setzero_si64() -> __m64 {
mem::transmute(0_i64)
}

/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
}

/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using signed saturation.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.packsswb"]
fn packsswb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packssdw"]
fn packssdw(a: __m64, b: __m64) -> __m64;
}

#[cfg(test)]
mod tests {
use v64::{i16x4, i32x2, i8x8};
use x86::i686::mmx;
use x86::__m64;
use stdsimd_test::simd_test;

#[simd_test = "sse"] // FIXME: should be mmx
unsafe fn _mm_setzero_si64() {
let r: __m64 = ::std::mem::transmute(0_i64);
assert_eq!(r, mmx::_mm_setzero_si64());
}

#[simd_test = "sse"] // FIXME: should be mmx
unsafe fn _mm_packs_pi16() {
let a = i16x4::new(-1, 2, -3, 4);
let b = i16x4::new(-5, 6, -7, 8);
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
}

#[simd_test = "sse"] // FIXME: should be mmx
unsafe fn _mm_packs_pi32() {
let a = i32x2::new(-1, 2);
let b = i32x2::new(-5, 6);
let r = i16x4::new(-1, 2, -5, 6);
assert_eq!(r, mmx::_mm_packs_pi32(a, b));
}
}
3 changes: 3 additions & 0 deletions coresimd/src/x86/i686/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
//! `i686` intrinsics

mod mmx;
pub use self::mmx::*;

mod sse;
pub use self::sse::*;

Expand Down
111 changes: 103 additions & 8 deletions coresimd/src/x86/i686/sse.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
//! `i686` Streaming SIMD Extensions (SSE)

use v64::{i16x4, u8x8};
use v128::f32x4;
use v64::{i16x4, i32x2, i8x8, u8x8};
use x86::__m64;
use core::mem;
use x86::i586;
use x86::i686::mmx;

#[cfg(test)]
use stdsimd_test::assert_instr;

/// This type is only required for mapping vector types to llvm's `x86_mmx`
/// type.
#[allow(non_camel_case_types)]
#[repr(simd)]
struct __m64(i64);

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.pmaxs.w"]
Expand All @@ -22,6 +20,10 @@ extern "C" {
fn pminsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pminu.b"]
fn pminub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtps2pi"]
fn cvtps2pi(a: f32x4) -> __m64;
#[link_name = "llvm.x86.sse.cvttps2pi"]
fn cvttps2pi(a: f32x4) -> __m64;
}

/// Compares the packed 16-bit signed integers of `a` and `b` writing the
Expand Down Expand Up @@ -96,9 +98,70 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
_mm_min_pu8(a, b)
}

/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 {
mem::transmute(cvttps2pi(a))
}

/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
_mm_cvttps_pi32(a)
}

/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 {
mem::transmute(cvtps2pi(a))
}

/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 {
_mm_cvtps_pi32(a)
}

/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 16-bit integers.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 {
let b = _mm_cvtps_pi32(a);
let a = i586::_mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
mmx::_mm_packs_pi32(b, c)
}

/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
/// result.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 {
let b = _mm_cvtps_pi16(a);
let c = mmx::_mm_setzero_si64();
mmx::_mm_packs_pi16(b, mem::transmute(c))
}

#[cfg(test)]
mod tests {
use v64::{i16x4, u8x8};
use v128::f32x4;
use v64::{i16x4, i32x2, i8x8, u8x8};
use x86::i686::sse;
use stdsimd_test::simd_test;

Expand Down Expand Up @@ -141,4 +204,36 @@ mod tests {
assert_eq!(r, sse::_mm_min_pu8(a, b));
assert_eq!(r, sse::_m_pminub(a, b));
}

#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi32() {
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
let r = i32x2::new(1, 2);

assert_eq!(r, sse::_mm_cvtps_pi32(a));
assert_eq!(r, sse::_mm_cvt_ps2pi(a));
}

#[simd_test = "sse"]
unsafe fn _mm_cvttps_pi32() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i32x2::new(7, 2);

assert_eq!(r, sse::_mm_cvttps_pi32(a));
assert_eq!(r, sse::_mm_cvtt_ps2pi(a));
}

#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi16() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i16x4::new(7, 2, 3, 4);
assert_eq!(r, sse::_mm_cvtps_pi16(a));
}

#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi8() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq!(r, sse::_mm_cvtps_pi8(a));
}
}
5 changes: 5 additions & 0 deletions coresimd/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ mod x86_64;
#[cfg(target_arch = "x86_64")]
pub use self::x86_64::*;

/// 64-bit wide integer vector type.
#[allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct __m64(i64); // corresponds to llvm's `x86_mmx` type
/// 128-bit wide signed integer vector type
#[allow(non_camel_case_types)]
pub type __m128i = ::v128::i8x16;
Expand Down