Skip to content

Commit 844ac9c

Browse files
committed
Update stdsimd-verify for vendor types
This commit provides insurance that intrinsics are only introduced with known canonical types (`__m128i` and such) instead of also allowing `u8x16` for example.
1 parent 3849d63 commit 844ac9c

File tree

4 files changed

+100
-116
lines changed

4 files changed

+100
-116
lines changed

coresimd/src/x86/i686/sse4a.rs

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use core::mem;
44
use v128::*;
5+
use x86::*;
56

67
#[cfg(test)]
78
use stdsimd_test::assert_instr;
@@ -13,9 +14,9 @@ extern "C" {
1314
#[link_name = "llvm.x86.sse4a.insertq"]
1415
fn insertq(x: i64x2, y: i64x2) -> i64x2;
1516
#[link_name = "llvm.x86.sse4a.movnt.sd"]
16-
fn movntsd(x: *mut f64, y: f64x2);
17+
fn movntsd(x: *mut f64, y: __m128d);
1718
#[link_name = "llvm.x86.sse4a.movnt.ss"]
18-
fn movntss(x: *mut f32, y: f32x4);
19+
fn movntss(x: *mut f32, y: __m128);
1920
}
2021

2122
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
@@ -35,8 +36,8 @@ extern "C" {
3536
#[inline(always)]
3637
#[target_feature(enable = "sse4a")]
3738
#[cfg_attr(test, assert_instr(extrq))]
38-
pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
39-
extrq(x, mem::transmute(y))
39+
pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
40+
mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
4041
}
4142

4243
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
@@ -51,59 +52,58 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
5152
#[inline(always)]
5253
#[target_feature(enable = "sse4a")]
5354
#[cfg_attr(test, assert_instr(insertq))]
54-
pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
55-
insertq(x, y)
55+
pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
56+
mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
5657
}
5758

5859
/// Non-temporal store of `a.0` into `p`.
5960
#[inline(always)]
6061
#[target_feature(enable = "sse4a")]
6162
#[cfg_attr(test, assert_instr(movntsd))]
62-
pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
63+
pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
6364
movntsd(p, a);
6465
}
6566

6667
/// Non-temporal store of `a.0` into `p`.
6768
#[inline(always)]
6869
#[target_feature(enable = "sse4a")]
6970
#[cfg_attr(test, assert_instr(movntss))]
70-
pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
71+
pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
7172
movntss(p, a);
7273
}
7374

7475
#[cfg(test)]
7576
mod tests {
7677
use stdsimd_test::simd_test;
77-
use x86::i686::sse4a;
78-
use v128::*;
78+
use x86::*;
7979

8080
#[simd_test = "sse4a"]
81-
unsafe fn _mm_extract_si64() {
81+
unsafe fn test_mm_extract_si64() {
8282
let b = 0b0110_0000_0000_i64;
8383
// ^^^^ bit range extracted
84-
let x = i64x2::new(b, 0);
84+
let x = _mm_setr_epi64x(b, 0);
8585
let v = 0b001000___00___000100_i64;
8686
// ^idx: 2^3 = 8 ^length = 2^2 = 4
87-
let y = i64x2::new(v, 0);
88-
let e = i64x2::new(0b0110_i64, 0);
89-
let r = sse4a::_mm_extract_si64(x, y);
87+
let y = _mm_setr_epi64x(v, 0);
88+
let e = _mm_setr_epi64x(0b0110_i64, 0);
89+
let r = _mm_extract_si64(x, y);
9090
assert_eq!(r, e);
9191
}
9292

9393
#[simd_test = "sse4a"]
94-
unsafe fn _mm_insert_si64() {
94+
unsafe fn test_mm_insert_si64() {
9595
let i = 0b0110_i64;
9696
// ^^^^ bit range inserted
9797
let z = 0b1010_1010_1010i64;
9898
// ^^^^ bit range replaced
9999
let e = 0b0110_1010_1010i64;
100100
// ^^^^ replaced 1010 with 0110
101-
let x = i64x2::new(z, 0);
102-
let expected = i64x2::new(e, 0);
101+
let x = _mm_setr_epi64x(z, 0);
102+
let expected = _mm_setr_epi64x(e, 0);
103103
let v = 0b001000___00___000100_i64;
104104
// ^idx: 2^3 = 8 ^length = 2^2 = 4
105-
let y = i64x2::new(i, v);
106-
let r = sse4a::_mm_insert_si64(x, y);
105+
let y = _mm_setr_epi64x(i, v);
106+
let r = _mm_insert_si64(x, y);
107107
assert_eq!(r, expected);
108108
}
109109

@@ -113,17 +113,17 @@ mod tests {
113113
}
114114

115115
#[simd_test = "sse4a"]
116-
unsafe fn _mm_stream_sd() {
116+
unsafe fn test_mm_stream_sd() {
117117
let mut mem = MemoryF64 {
118118
data: [1.0_f64, 2.0],
119119
};
120120
{
121121
let vals = &mut mem.data;
122122
let d = vals.as_mut_ptr();
123123

124-
let x = f64x2::new(3.0, 4.0);
124+
let x = _mm_setr_pd(3.0, 4.0);
125125

126-
sse4a::_mm_stream_sd(d, x);
126+
_mm_stream_sd(d, x);
127127
}
128128
assert_eq!(mem.data[0], 3.0);
129129
assert_eq!(mem.data[1], 2.0);
@@ -135,17 +135,17 @@ mod tests {
135135
}
136136

137137
#[simd_test = "sse4a"]
138-
unsafe fn _mm_stream_ss() {
138+
unsafe fn test_mm_stream_ss() {
139139
let mut mem = MemoryF32 {
140140
data: [1.0_f32, 2.0, 3.0, 4.0],
141141
};
142142
{
143143
let vals = &mut mem.data;
144144
let d = vals.as_mut_ptr();
145145

146-
let x = f32x4::new(5.0, 6.0, 7.0, 8.0);
146+
let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
147147

148-
sse4a::_mm_stream_ss(d, x);
148+
_mm_stream_ss(d, x);
149149
}
150150
assert_eq!(mem.data[0], 5.0);
151151
assert_eq!(mem.data[1], 2.0);

coresimd/src/x86/test.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,14 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
6363
union A { a: __m256, b: [f32; 8] };
6464
A { a }.b[idx]
6565
}
66+
67+
// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r which
68+
// doesn't exist on x86!
69+
#[cfg(target_arch = "x86")]
70+
#[target_feature(enable = "sse")]
71+
pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
72+
union A { a: __m128i, b: [i64; 2] };
73+
let mut a = A { a };
74+
a.b[idx as usize] = val;
75+
a.a
76+
}

stdsimd-verify/src/lib.rs

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
2525

2626
let mut files = Vec::new();
2727
walk(&root, &mut files);
28+
assert!(files.len() > 0);
2829

2930
let mut functions = Vec::new();
3031
for file in files {
@@ -35,6 +36,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
3536
}
3637
}
3738
}
39+
assert!(functions.len() > 0);
3840

3941
functions.retain(|f| {
4042
match f.vis {
@@ -48,10 +50,11 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
4850
.iter()
4951
.filter_map(|a| a.interpret_meta())
5052
.any(|a| match a {
51-
syn::Meta::NameValue(i) => i.ident == "target_feature",
53+
syn::Meta::List(i) => i.ident == "target_feature",
5254
_ => false,
5355
})
5456
});
57+
assert!(functions.len() > 0);
5558

5659
let input = proc_macro2::TokenStream::from(input);
5760

@@ -97,48 +100,24 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
97100
fn to_type(t: &syn::Type) -> Tokens {
98101
match *t {
99102
syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
100-
"__m128" => my_quote! { &F32x4 },
101-
"__m128d" => my_quote! { &F64x2 },
102-
"__m128i" => my_quote! { &I8x16 },
103-
"__m256i" => my_quote! { &I8x32 },
104-
"__m64" => my_quote! { &I8x8 },
103+
"__m128" => my_quote! { &M128 },
104+
"__m128d" => my_quote! { &M128D },
105+
"__m128i" => my_quote! { &M128I },
106+
"__m256" => my_quote! { &M256 },
107+
"__m256d" => my_quote! { &M256D },
108+
"__m256i" => my_quote! { &M256I },
109+
"__m64" => my_quote! { &M64 },
105110
"bool" => my_quote! { &BOOL },
106111
"f32" => my_quote! { &F32 },
107-
"f32x4" => my_quote! { &F32x4 },
108-
"f32x8" => my_quote! { &F32x8 },
109112
"f64" => my_quote! { &F64 },
110-
"f64x2" => my_quote! { &F64x2 },
111-
"f64x4" => my_quote! { &F64x4 },
112113
"i16" => my_quote! { &I16 },
113-
"i16x16" => my_quote! { &I16x16 },
114-
"i16x4" => my_quote! { &I16x4 },
115-
"i16x8" => my_quote! { &I16x8 },
116114
"i32" => my_quote! { &I32 },
117-
"i32x2" => my_quote! { &I32x2 },
118-
"i32x4" => my_quote! { &I32x4 },
119-
"i32x8" => my_quote! { &I32x8 },
120115
"i64" => my_quote! { &I64 },
121-
"i64x2" => my_quote! { &I64x2 },
122-
"i64x4" => my_quote! { &I64x4 },
123116
"i8" => my_quote! { &I8 },
124-
"i8x16" => my_quote! { &I8x16 },
125-
"i8x32" => my_quote! { &I8x32 },
126-
"i8x8" => my_quote! { &I8x8 },
127-
"u16x4" => my_quote! { &U16x4 },
128-
"u16x8" => my_quote! { &U16x8 },
117+
"u16" => my_quote! { &U16 },
129118
"u32" => my_quote! { &U32 },
130-
"u32x2" => my_quote! { &U32x2 },
131-
"u32x4" => my_quote! { &U32x4 },
132-
"u32x8" => my_quote! { &U32x8 },
133119
"u64" => my_quote! { &U64 },
134-
"u64x2" => my_quote! { &U64x2 },
135-
"u64x4" => my_quote! { &U64x4 },
136120
"u8" => my_quote! { &U8 },
137-
"u16" => my_quote! { &U16 },
138-
"u8x16" => my_quote! { &U8x16 },
139-
"u8x32" => my_quote! { &U8x32 },
140-
"u16x16" => my_quote! { &U16x16 },
141-
"u8x8" => my_quote! { &U8x8 },
142121
s => panic!("unspported type: {}", s),
143122
},
144123
syn::Type::Ptr(syn::TypePtr { ref elem, .. })
@@ -233,15 +212,34 @@ fn find_target_feature(
233212
.iter()
234213
.filter_map(|a| a.interpret_meta())
235214
.filter_map(|a| match a {
236-
syn::Meta::NameValue(i) => {
215+
syn::Meta::List(i) => {
237216
if i.ident == "target_feature" {
238-
Some(i.lit)
217+
Some(i.nested)
239218
} else {
240219
None
241220
}
242221
}
243222
_ => None,
244223
})
224+
.flat_map(|list| list)
225+
.filter_map(|nested| {
226+
match nested {
227+
syn::NestedMeta::Meta(m) => Some(m),
228+
syn::NestedMeta::Literal(_) => None,
229+
}
230+
})
231+
.filter_map(|m| {
232+
match m {
233+
syn::Meta::NameValue(i) => {
234+
if i.ident == "enable" {
235+
Some(i.lit)
236+
} else {
237+
None
238+
}
239+
}
240+
_ => None,
241+
}
242+
})
245243
.next()
246244
.expect(&format!("failed to find target_feature for {}", name))
247245
}

0 commit comments

Comments
 (0)