diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs index c965375a2f..a2ea361d76 100644 --- a/coresimd/src/x86/i586/avx2.rs +++ b/coresimd/src/x86/i586/avx2.rs @@ -3162,8 +3162,6 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use v256::*; - use v128::*; use x86::*; use std; @@ -3423,17 +3421,16 @@ mod tests { unsafe fn test_mm256_and_si256() { let a = _mm256_set1_epi8(5); let b = _mm256_set1_epi8(3); - let got = _mm256_and_si256(__m256i::from(a), __m256i::from(b)); - assert_eq!(got, __m256i::from(_mm256_set1_epi8(1))); + let got = _mm256_and_si256(a, b); + assert_eq!(got, _mm256_set1_epi8(1)); } #[simd_test = "avx2"] unsafe fn test_mm256_andnot_si256() { let a = _mm256_set1_epi8(5); let b = _mm256_set1_epi8(3); - let got = - _mm256_andnot_si256(__m256i::from(a), __m256i::from(b)); - assert_eq!(got, __m256i::from(_mm256_set1_epi8(2))); + let got = _mm256_andnot_si256(a, b); + assert_eq!(got, _mm256_set1_epi8(2)); } #[simd_test = "avx2"] @@ -3774,9 +3771,9 @@ mod tests { #[simd_test = "avx2"] unsafe fn test_mm256_extracti128_si256() { - let a = __m256i::from(_mm256_setr_epi64x(1, 2, 3, 4)); + let a = _mm256_setr_epi64x(1, 2, 3, 4); let r = _mm256_extracti128_si256(a, 0b01); - let e = __m128i::from(_mm_setr_epi64x(3, 4)); + let e = _mm_setr_epi64x(3, 4); assert_eq!(r, e); } @@ -3850,11 +3847,11 @@ mod tests { #[simd_test = "avx2"] unsafe fn test_mm256_inserti128_si256() { - let a = __m256i::from(_mm256_setr_epi64x(1, 2, 3, 4)); - let b = __m128i::from(_mm_setr_epi64x(7, 8)); + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let b = _mm_setr_epi64x(7, 8); let r = _mm256_inserti128_si256(a, b, 0b01); let e = _mm256_setr_epi64x(1, 2, 7, 8); - assert_eq!(r, __m256i::from(e)); + assert_eq!(r, e); } #[simd_test = "avx2"] @@ -4124,8 +4121,8 @@ mod tests { #[simd_test = "avx2"] unsafe fn test_mm256_or_si256() { - let a = __m256i::from(_mm256_set1_epi8(-1)); - let b = __m256i::from(_mm256_set1_epi8(0)); + let a = _mm256_set1_epi8(-1); + let b = _mm256_set1_epi8(0); let r = _mm256_or_si256(a, b); assert_eq!(r, a); } @@ -4301,8 +4298,8 @@ mod tests { #[simd_test = "avx2"] unsafe fn test_mm256_slli_si256() { let a = _mm256_set1_epi64x(0xFFFFFFFF); - let r = _mm256_slli_si256(__m256i::from(a), 3); - assert_eq!(r, __m256i::from(_mm256_set1_epi64x(0xFFFFFFFF000000))); + let r = _mm256_slli_si256(a, 3); + assert_eq!(r, _mm256_set1_epi64x(0xFFFFFFFF000000)); } #[simd_test = "avx2"] @@ -4400,7 +4397,7 @@ mod tests { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); - let r = _mm256_srli_si256(__m256i::from(a), 3); + let r = _mm256_srli_si256(a, 3); #[cfg_attr(rustfmt, rustfmt_skip)] let e = _mm256_setr_epi8( 4, 5, 6, 7, 8, 9, 10, 11, @@ -4408,7 +4405,7 @@ mod tests { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, ); - assert_eq!(r, __m256i::from(e)); + assert_eq!(r, e); } #[simd_test = "avx2"] @@ -4561,10 +4558,10 @@ mod tests { #[simd_test = "avx2"] unsafe fn test_mm256_xor_si256() { - let a = __m256i::from(_mm256_set1_epi8(5)); - let b = __m256i::from(_mm256_set1_epi8(3)); + let a = _mm256_set1_epi8(5); + let b = _mm256_set1_epi8(3); let r = _mm256_xor_si256(a, b); - assert_eq!(r, __m256i::from(_mm256_set1_epi8(6))); + assert_eq!(r, _mm256_set1_epi8(6)); } #[simd_test = "avx2"] diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs index 79f4066340..e0ed08fe64 100644 --- a/coresimd/src/x86/i686/sse4a.rs +++ b/coresimd/src/x86/i686/sse4a.rs @@ -2,6 +2,7 @@ use core::mem; use v128::*; +use x86::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -13,9 +14,9 @@ extern "C" { #[link_name = "llvm.x86.sse4a.insertq"] fn insertq(x: i64x2, y: i64x2) -> i64x2; #[link_name = "llvm.x86.sse4a.movnt.sd"] - fn movntsd(x: *mut f64, y: f64x2); + fn movntsd(x: *mut f64, y: __m128d); #[link_name = "llvm.x86.sse4a.movnt.ss"] - fn movntss(x: *mut f32, y: f32x4); + fn movntss(x: *mut f32, y: __m128); } // FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ @@ -35,8 +36,8 @@ extern "C" { #[inline(always)] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(extrq))] -pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 { - extrq(x, mem::transmute(y)) +pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { + mem::transmute(extrq(x.as_i64x2(), y.as_i8x16())) } /// Inserts the `[length:0]` bits of `y` into `x` at `index`. @@ -51,15 +52,15 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 { #[inline(always)] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(insertq))] -pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 { - insertq(x, y) +pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { + mem::transmute(insertq(x.as_i64x2(), y.as_i64x2())) } /// Non-temporal store of `a.0` into `p`. #[inline(always)] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntsd))] -pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) { +pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { movntsd(p, a); } @@ -67,43 +68,42 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) { #[inline(always)] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntss))] -pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) { +pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { movntss(p, a); } #[cfg(test)] mod tests { use stdsimd_test::simd_test; - use x86::i686::sse4a; - use v128::*; + use x86::*; #[simd_test = "sse4a"] - unsafe fn _mm_extract_si64() { + unsafe fn test_mm_extract_si64() { let b = 0b0110_0000_0000_i64; // ^^^^ bit range extracted - let x = i64x2::new(b, 0); + let x = _mm_setr_epi64x(b, 0); let v = 0b001000___00___000100_i64; // ^idx: 2^3 = 8 ^length = 2^2 = 4 - let y = i64x2::new(v, 0); - let e = i64x2::new(0b0110_i64, 0); - let r = sse4a::_mm_extract_si64(x, y); + let y = _mm_setr_epi64x(v, 0); + let e = _mm_setr_epi64x(0b0110_i64, 0); + let r = _mm_extract_si64(x, y); assert_eq!(r, e); } #[simd_test = "sse4a"] - unsafe fn _mm_insert_si64() { + unsafe fn test_mm_insert_si64() { let i = 0b0110_i64; // ^^^^ bit range inserted let z = 0b1010_1010_1010i64; // ^^^^ bit range replaced let e = 0b0110_1010_1010i64; // ^^^^ replaced 1010 with 0110 - let x = i64x2::new(z, 0); - let expected = i64x2::new(e, 0); + let x = _mm_setr_epi64x(z, 0); + let expected = _mm_setr_epi64x(e, 0); let v = 0b001000___00___000100_i64; // ^idx: 2^3 = 8 ^length = 2^2 = 4 - let y = i64x2::new(i, v); - let r = sse4a::_mm_insert_si64(x, y); + let y = _mm_setr_epi64x(i, v); + let r = _mm_insert_si64(x, y); assert_eq!(r, expected); } @@ -113,7 +113,7 @@ mod tests { } #[simd_test = "sse4a"] - unsafe fn _mm_stream_sd() { + unsafe fn test_mm_stream_sd() { let mut mem = MemoryF64 { data: [1.0_f64, 2.0], }; @@ -121,9 +121,9 @@ mod tests { let vals = &mut mem.data; let d = vals.as_mut_ptr(); - let x = f64x2::new(3.0, 4.0); + let x = _mm_setr_pd(3.0, 4.0); - sse4a::_mm_stream_sd(d, x); + _mm_stream_sd(d, x); } assert_eq!(mem.data[0], 3.0); assert_eq!(mem.data[1], 2.0); @@ -135,7 +135,7 @@ mod tests { } #[simd_test = "sse4a"] - unsafe fn _mm_stream_ss() { + unsafe fn test_mm_stream_ss() { let mut mem = MemoryF32 { data: [1.0_f32, 2.0, 3.0, 4.0], }; @@ -143,9 +143,9 @@ mod tests { let vals = &mut mem.data; let d = vals.as_mut_ptr(); - let x = f32x4::new(5.0, 6.0, 7.0, 8.0); + let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); - sse4a::_mm_stream_ss(d, x); + _mm_stream_ss(d, x); } assert_eq!(mem.data[0], 5.0); assert_eq!(mem.data[1], 2.0); diff --git a/coresimd/src/x86/test.rs b/coresimd/src/x86/test.rs index ad36b813a0..94cd3860dc 100644 --- a/coresimd/src/x86/test.rs +++ b/coresimd/src/x86/test.rs @@ -63,3 +63,14 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 { union A { a: __m256, b: [f32; 8] }; A { a }.b[idx] } + +// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r which +// doesn't exist on x86! +#[cfg(target_arch = "x86")] +#[target_feature(enable = "avx")] +pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i { + union A { a: __m128i, b: [i64; 2] }; + let mut a = A { a }; + a.b[idx as usize] = val; + a.a +} diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs index 5fe888c24c..686bd4737b 100644 --- a/stdsimd-verify/src/lib.rs +++ b/stdsimd-verify/src/lib.rs @@ -25,6 +25,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { let mut files = Vec::new(); walk(&root, &mut files); + assert!(files.len() > 0); let mut functions = Vec::new(); for file in files { @@ -35,6 +36,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { } } } + assert!(functions.len() > 0); functions.retain(|f| { match f.vis { @@ -48,10 +50,11 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { .iter() .filter_map(|a| a.interpret_meta()) .any(|a| match a { - syn::Meta::NameValue(i) => i.ident == "target_feature", + syn::Meta::List(i) => i.ident == "target_feature", _ => false, }) }); + assert!(functions.len() > 0); let input = proc_macro2::TokenStream::from(input); @@ -97,48 +100,24 @@ pub fn x86_functions(input: TokenStream) -> TokenStream { fn to_type(t: &syn::Type) -> Tokens { match *t { syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() { - "__m128" => my_quote! { &F32x4 }, - "__m128d" => my_quote! { &F64x2 }, - "__m128i" => my_quote! { &I8x16 }, - "__m256i" => my_quote! { &I8x32 }, - "__m64" => my_quote! { &I8x8 }, + "__m128" => my_quote! { &M128 }, + "__m128d" => my_quote! { &M128D }, + "__m128i" => my_quote! { &M128I }, + "__m256" => my_quote! { &M256 }, + "__m256d" => my_quote! { &M256D }, + "__m256i" => my_quote! { &M256I }, + "__m64" => my_quote! { &M64 }, "bool" => my_quote! { &BOOL }, "f32" => my_quote! { &F32 }, - "f32x4" => my_quote! { &F32x4 }, - "f32x8" => my_quote! { &F32x8 }, "f64" => my_quote! { &F64 }, - "f64x2" => my_quote! { &F64x2 }, - "f64x4" => my_quote! { &F64x4 }, "i16" => my_quote! { &I16 }, - "i16x16" => my_quote! { &I16x16 }, - "i16x4" => my_quote! { &I16x4 }, - "i16x8" => my_quote! { &I16x8 }, "i32" => my_quote! { &I32 }, - "i32x2" => my_quote! { &I32x2 }, - "i32x4" => my_quote! { &I32x4 }, - "i32x8" => my_quote! { &I32x8 }, "i64" => my_quote! { &I64 }, - "i64x2" => my_quote! { &I64x2 }, - "i64x4" => my_quote! { &I64x4 }, "i8" => my_quote! { &I8 }, - "i8x16" => my_quote! { &I8x16 }, - "i8x32" => my_quote! { &I8x32 }, - "i8x8" => my_quote! { &I8x8 }, - "u16x4" => my_quote! { &U16x4 }, - "u16x8" => my_quote! { &U16x8 }, + "u16" => my_quote! { &U16 }, "u32" => my_quote! { &U32 }, - "u32x2" => my_quote! { &U32x2 }, - "u32x4" => my_quote! { &U32x4 }, - "u32x8" => my_quote! { &U32x8 }, "u64" => my_quote! { &U64 }, - "u64x2" => my_quote! { &U64x2 }, - "u64x4" => my_quote! { &U64x4 }, "u8" => my_quote! { &U8 }, - "u16" => my_quote! { &U16 }, - "u8x16" => my_quote! { &U8x16 }, - "u8x32" => my_quote! { &U8x32 }, - "u16x16" => my_quote! { &U16x16 }, - "u8x8" => my_quote! { &U8x8 }, s => panic!("unspported type: {}", s), }, syn::Type::Ptr(syn::TypePtr { ref elem, .. }) @@ -233,15 +212,34 @@ fn find_target_feature( .iter() .filter_map(|a| a.interpret_meta()) .filter_map(|a| match a { - syn::Meta::NameValue(i) => { + syn::Meta::List(i) => { if i.ident == "target_feature" { - Some(i.lit) + Some(i.nested) } else { None } } _ => None, }) + .flat_map(|list| list) + .filter_map(|nested| { + match nested { + syn::NestedMeta::Meta(m) => Some(m), + syn::NestedMeta::Literal(_) => None, + } + }) + .filter_map(|m| { + match m { + syn::Meta::NameValue(i) => { + if i.ident == "enable" { + Some(i.lit) + } else { + None + } + } + _ => None, + } + }) .next() .expect(&format!("failed to find target_feature for {}", name)) } diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs index 87c1a1fc87..c4ac03543d 100644 --- a/stdsimd-verify/tests/x86-intel.rs +++ b/stdsimd-verify/tests/x86-intel.rs @@ -22,51 +22,37 @@ struct Function { static BOOL: Type = Type::Bool; static F32: Type = Type::PrimFloat(32); -static F32x4: Type = Type::Float(32, 4); -static F32x8: Type = Type::Float(32, 8); static F64: Type = Type::PrimFloat(64); -static F64x2: Type = Type::Float(64, 2); -static F64x4: Type = Type::Float(64, 4); static I16: Type = Type::PrimSigned(16); -static I16x16: Type = Type::Signed(16, 16); -static I16x4: Type = Type::Signed(16, 4); -static I16x8: Type = Type::Signed(16, 8); static I32: Type = Type::PrimSigned(32); -static I32x2: Type = Type::Signed(32, 2); -static I32x4: Type = Type::Signed(32, 4); -static I32x8: Type = Type::Signed(32, 8); static I64: Type = Type::PrimSigned(64); -static I64x2: Type = Type::Signed(64, 2); -static I64x4: Type = Type::Signed(64, 4); static I8: Type = Type::PrimSigned(8); -static I8x16: Type = Type::Signed(8, 16); -static I8x32: Type = Type::Signed(8, 32); -static I8x8: Type = Type::Signed(8, 8); static U16: Type = Type::PrimUnsigned(16); -static U16x16: Type = Type::Unsigned(16, 16); -// static U16x4: Type = Type::Unsigned(16, 4); -static U16x8: Type = Type::Unsigned(16, 8); static U32: Type = Type::PrimUnsigned(32); -static U32x2: Type = Type::Unsigned(32, 2); -static U32x4: Type = Type::Unsigned(32, 4); -static U32x8: Type = Type::Unsigned(32, 8); static U64: Type = Type::PrimUnsigned(64); -static U64x2: Type = Type::Unsigned(64, 2); -static U64x4: Type = Type::Unsigned(64, 4); static U8: Type = Type::PrimUnsigned(8); -static U8x16: Type = Type::Unsigned(8, 16); -static U8x32: Type = Type::Unsigned(8, 32); -// static U8x8: Type = Type::Unsigned(8, 8); + +static M64: Type = Type::M64; +static M128: Type = Type::M128; +static M128I: Type = Type::M128I; +static M128D: Type = Type::M128D; +static M256: Type = Type::M256; +static M256I: Type = Type::M256I; +static M256D: Type = Type::M256D; #[derive(Debug)] enum Type { - Float(u8, u8), PrimFloat(u8), PrimSigned(u8), PrimUnsigned(u8), Ptr(&'static Type), - Signed(u8, u8), - Unsigned(u8, u8), + M64, + M128, + M128D, + M128I, + M256, + M256D, + M256I, Bool, } @@ -271,33 +257,22 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) { (&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {} (&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {} - (&Type::Signed(a, b), "__m128i") - | (&Type::Unsigned(a, b), "__m128i") - | (&Type::Ptr(&Type::Signed(a, b)), "__m128i*") - | (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {} - - (&Type::Signed(a, b), "__m256i") - | (&Type::Unsigned(a, b), "__m256i") - | (&Type::Ptr(&Type::Signed(a, b)), "__m256i*") - | (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*") - if (a as u32) * (b as u32) == 256 => {} - - (&Type::Signed(a, b), "__m64") - | (&Type::Unsigned(a, b), "__m64") - | (&Type::Ptr(&Type::Signed(a, b)), "__m64*") - | (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {} - - (&Type::Float(32, 4), "__m128") => {} - (&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {} - - (&Type::Float(64, 2), "__m128d") => {} - (&Type::Ptr(&Type::Float(64, 2)), "__m128d*") => {} + (&Type::M64, "__m64") + | (&Type::Ptr(&Type::M64), "__m64*") => {} - (&Type::Float(32, 8), "__m256") => {} - (&Type::Ptr(&Type::Float(32, 8)), "__m256*") => {} + (&Type::M128I, "__m128i") + | (&Type::Ptr(&Type::M128I), "__m128i*") + | (&Type::M128D, "__m128d") + | (&Type::Ptr(&Type::M128D), "__m128d*") + | (&Type::M128, "__m128") + | (&Type::Ptr(&Type::M128), "__m128*") => {} - (&Type::Float(64, 4), "__m256d") => {} - (&Type::Ptr(&Type::Float(64, 4)), "__m256d*") => {} + (&Type::M256I, "__m256i") + | (&Type::Ptr(&Type::M256I), "__m256i*") + | (&Type::M256D, "__m256d") + | (&Type::Ptr(&Type::M256D), "__m256d*") + | (&Type::M256, "__m256") + | (&Type::Ptr(&Type::M256), "__m256*") => {} // These two intrinsics return a 16-bit element but in Intel's // intrinsics they're listed as returning an `int`. @@ -311,7 +286,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) { // This is a macro (?) in C which seems to mutate its arguments, but // that means that we're taking pointers to arguments in rust // as we're not exposing it as a macro. - (&Type::Ptr(&Type::Float(32, 4)), "__m128") + (&Type::Ptr(&Type::M128), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {} // These intrinsics return an `int` in C but they're always either the