diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs
index c965375a2f..a2ea361d76 100644
--- a/coresimd/src/x86/i586/avx2.rs
+++ b/coresimd/src/x86/i586/avx2.rs
@@ -3162,8 +3162,6 @@ extern "C" {
 mod tests {
     use stdsimd_test::simd_test;
 
-    use v256::*;
-    use v128::*;
     use x86::*;
     use std;
 
@@ -3423,17 +3421,16 @@ mod tests {
     unsafe fn test_mm256_and_si256() {
         let a = _mm256_set1_epi8(5);
         let b = _mm256_set1_epi8(3);
-        let got = _mm256_and_si256(__m256i::from(a), __m256i::from(b));
-        assert_eq!(got, __m256i::from(_mm256_set1_epi8(1)));
+        let got = _mm256_and_si256(a, b);
+        assert_eq!(got, _mm256_set1_epi8(1));
     }
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_andnot_si256() {
         let a = _mm256_set1_epi8(5);
         let b = _mm256_set1_epi8(3);
-        let got =
-            _mm256_andnot_si256(__m256i::from(a), __m256i::from(b));
-        assert_eq!(got, __m256i::from(_mm256_set1_epi8(2)));
+        let got = _mm256_andnot_si256(a, b);
+        assert_eq!(got, _mm256_set1_epi8(2));
     }
 
     #[simd_test = "avx2"]
@@ -3774,9 +3771,9 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_extracti128_si256() {
-        let a = __m256i::from(_mm256_setr_epi64x(1, 2, 3, 4));
+        let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let r = _mm256_extracti128_si256(a, 0b01);
-        let e = __m128i::from(_mm_setr_epi64x(3, 4));
+        let e = _mm_setr_epi64x(3, 4);
         assert_eq!(r, e);
     }
 
@@ -3850,11 +3847,11 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_inserti128_si256() {
-        let a = __m256i::from(_mm256_setr_epi64x(1, 2, 3, 4));
-        let b = __m128i::from(_mm_setr_epi64x(7, 8));
+        let a = _mm256_setr_epi64x(1, 2, 3, 4);
+        let b = _mm_setr_epi64x(7, 8);
         let r = _mm256_inserti128_si256(a, b, 0b01);
         let e = _mm256_setr_epi64x(1, 2, 7, 8);
-        assert_eq!(r, __m256i::from(e));
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
@@ -4124,8 +4121,8 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_or_si256() {
-        let a = __m256i::from(_mm256_set1_epi8(-1));
-        let b = __m256i::from(_mm256_set1_epi8(0));
+        let a = _mm256_set1_epi8(-1);
+        let b = _mm256_set1_epi8(0);
         let r = _mm256_or_si256(a, b);
         assert_eq!(r, a);
     }
@@ -4301,8 +4298,8 @@ mod tests {
     #[simd_test = "avx2"]
     unsafe fn test_mm256_slli_si256() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
-        let r = _mm256_slli_si256(__m256i::from(a), 3);
-        assert_eq!(r, __m256i::from(_mm256_set1_epi64x(0xFFFFFFFF000000)));
+        let r = _mm256_slli_si256(a, 3);
+        assert_eq!(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
     }
 
     #[simd_test = "avx2"]
@@ -4400,7 +4397,7 @@ mod tests {
             17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
         );
-        let r = _mm256_srli_si256(__m256i::from(a), 3);
+        let r = _mm256_srli_si256(a, 3);
         #[cfg_attr(rustfmt, rustfmt_skip)]
         let e = _mm256_setr_epi8(
             4, 5, 6, 7, 8, 9, 10, 11,
@@ -4408,7 +4405,7 @@ mod tests {
             20, 21, 22, 23, 24, 25, 26, 27,
             28, 29, 30, 31, 32, 0, 0, 0,
         );
-        assert_eq!(r, __m256i::from(e));
+        assert_eq!(r, e);
     }
 
     #[simd_test = "avx2"]
@@ -4561,10 +4558,10 @@ mod tests {
 
     #[simd_test = "avx2"]
     unsafe fn test_mm256_xor_si256() {
-        let a = __m256i::from(_mm256_set1_epi8(5));
-        let b = __m256i::from(_mm256_set1_epi8(3));
+        let a = _mm256_set1_epi8(5);
+        let b = _mm256_set1_epi8(3);
         let r = _mm256_xor_si256(a, b);
-        assert_eq!(r, __m256i::from(_mm256_set1_epi8(6)));
+        assert_eq!(r, _mm256_set1_epi8(6));
     }
 
     #[simd_test = "avx2"]
diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs
index 79f4066340..e0ed08fe64 100644
--- a/coresimd/src/x86/i686/sse4a.rs
+++ b/coresimd/src/x86/i686/sse4a.rs
@@ -2,6 +2,7 @@
 
 use core::mem;
 use v128::*;
+use x86::*;
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -13,9 +14,9 @@ extern "C" {
     #[link_name = "llvm.x86.sse4a.insertq"]
     fn insertq(x: i64x2, y: i64x2) -> i64x2;
     #[link_name = "llvm.x86.sse4a.movnt.sd"]
-    fn movntsd(x: *mut f64, y: f64x2);
+    fn movntsd(x: *mut f64, y: __m128d);
     #[link_name = "llvm.x86.sse4a.movnt.ss"]
-    fn movntss(x: *mut f32, y: f32x4);
+    fn movntss(x: *mut f32, y: __m128);
 }
 
 // FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
@@ -35,8 +36,8 @@ extern "C" {
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(extrq))]
-pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
-    extrq(x, mem::transmute(y))
+pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
+    mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
 }
 
 /// Inserts the `[length:0]` bits of `y` into `x` at `index`.
@@ -51,15 +52,15 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(insertq))]
-pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
-    insertq(x, y)
+pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
+    mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
 }
 
 /// Non-temporal store of `a.0` into `p`.
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntsd))]
-pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
+pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
     movntsd(p, a);
 }
 
@@ -67,43 +68,42 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntss))]
-pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
+pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
     movntss(p, a);
 }
 
 #[cfg(test)]
 mod tests {
     use stdsimd_test::simd_test;
-    use x86::i686::sse4a;
-    use v128::*;
+    use x86::*;
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_extract_si64() {
+    unsafe fn test_mm_extract_si64() {
         let b = 0b0110_0000_0000_i64;
         //        ^^^^ bit range extracted
-        let x = i64x2::new(b, 0);
+        let x = _mm_setr_epi64x(b, 0);
         let v = 0b001000___00___000100_i64;
         //        ^idx: 2^3 = 8 ^length = 2^2 = 4
-        let y = i64x2::new(v, 0);
-        let e = i64x2::new(0b0110_i64, 0);
-        let r = sse4a::_mm_extract_si64(x, y);
+        let y = _mm_setr_epi64x(v, 0);
+        let e = _mm_setr_epi64x(0b0110_i64, 0);
+        let r = _mm_extract_si64(x, y);
         assert_eq!(r, e);
     }
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_insert_si64() {
+    unsafe fn test_mm_insert_si64() {
         let i = 0b0110_i64;
         //        ^^^^ bit range inserted
         let z = 0b1010_1010_1010i64;
         //        ^^^^ bit range replaced
         let e = 0b0110_1010_1010i64;
         //        ^^^^ replaced 1010 with 0110
-        let x = i64x2::new(z, 0);
-        let expected = i64x2::new(e, 0);
+        let x = _mm_setr_epi64x(z, 0);
+        let expected = _mm_setr_epi64x(e, 0);
         let v = 0b001000___00___000100_i64;
         //        ^idx: 2^3 = 8 ^length = 2^2 = 4
-        let y = i64x2::new(i, v);
-        let r = sse4a::_mm_insert_si64(x, y);
+        let y = _mm_setr_epi64x(i, v);
+        let r = _mm_insert_si64(x, y);
         assert_eq!(r, expected);
     }
 
@@ -113,7 +113,7 @@ mod tests {
     }
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_stream_sd() {
+    unsafe fn test_mm_stream_sd() {
         let mut mem = MemoryF64 {
             data: [1.0_f64, 2.0],
         };
@@ -121,9 +121,9 @@ mod tests {
             let vals = &mut mem.data;
             let d = vals.as_mut_ptr();
 
-            let x = f64x2::new(3.0, 4.0);
+            let x = _mm_setr_pd(3.0, 4.0);
 
-            sse4a::_mm_stream_sd(d, x);
+            _mm_stream_sd(d, x);
         }
         assert_eq!(mem.data[0], 3.0);
         assert_eq!(mem.data[1], 2.0);
@@ -135,7 +135,7 @@ mod tests {
     }
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_stream_ss() {
+    unsafe fn test_mm_stream_ss() {
         let mut mem = MemoryF32 {
             data: [1.0_f32, 2.0, 3.0, 4.0],
         };
@@ -143,9 +143,9 @@ mod tests {
             let vals = &mut mem.data;
             let d = vals.as_mut_ptr();
 
-            let x = f32x4::new(5.0, 6.0, 7.0, 8.0);
+            let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
 
-            sse4a::_mm_stream_ss(d, x);
+            _mm_stream_ss(d, x);
         }
         assert_eq!(mem.data[0], 5.0);
         assert_eq!(mem.data[1], 2.0);
diff --git a/coresimd/src/x86/test.rs b/coresimd/src/x86/test.rs
index ad36b813a0..94cd3860dc 100644
--- a/coresimd/src/x86/test.rs
+++ b/coresimd/src/x86/test.rs
@@ -63,3 +63,14 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
     union A { a: __m256, b: [f32; 8] };
     A { a }.b[idx]
 }
+
+// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r which
+// doesn't exist on x86!
+#[cfg(target_arch = "x86")]
+#[target_feature(enable = "avx")]
+pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
+    union A { a: __m128i, b: [i64; 2] };
+    let mut a = A { a };
+    a.b[idx as usize] = val;
+    a.a
+}
diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs
index 5fe888c24c..686bd4737b 100644
--- a/stdsimd-verify/src/lib.rs
+++ b/stdsimd-verify/src/lib.rs
@@ -25,6 +25,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
 
     let mut files = Vec::new();
     walk(&root, &mut files);
+    assert!(files.len() > 0);
 
     let mut functions = Vec::new();
     for file in files {
@@ -35,6 +36,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
             }
         }
     }
+    assert!(functions.len() > 0);
 
     functions.retain(|f| {
         match f.vis {
@@ -48,10 +50,11 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
             .iter()
             .filter_map(|a| a.interpret_meta())
             .any(|a| match a {
-                syn::Meta::NameValue(i) => i.ident == "target_feature",
+                syn::Meta::List(i) => i.ident == "target_feature",
                 _ => false,
             })
     });
+    assert!(functions.len() > 0);
 
     let input = proc_macro2::TokenStream::from(input);
 
@@ -97,48 +100,24 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
 fn to_type(t: &syn::Type) -> Tokens {
     match *t {
         syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
-            "__m128" => my_quote! { &F32x4 },
-            "__m128d" => my_quote! { &F64x2 },
-            "__m128i" => my_quote! { &I8x16 },
-            "__m256i" => my_quote! { &I8x32 },
-            "__m64" => my_quote! { &I8x8 },
+            "__m128" => my_quote! { &M128 },
+            "__m128d" => my_quote! { &M128D },
+            "__m128i" => my_quote! { &M128I },
+            "__m256" => my_quote! { &M256 },
+            "__m256d" => my_quote! { &M256D },
+            "__m256i" => my_quote! { &M256I },
+            "__m64" => my_quote! { &M64 },
             "bool" => my_quote! { &BOOL },
             "f32" => my_quote! { &F32 },
-            "f32x4" => my_quote! { &F32x4 },
-            "f32x8" => my_quote! { &F32x8 },
             "f64" => my_quote! { &F64 },
-            "f64x2" => my_quote! { &F64x2 },
-            "f64x4" => my_quote! { &F64x4 },
             "i16" => my_quote! { &I16 },
-            "i16x16" => my_quote! { &I16x16 },
-            "i16x4" => my_quote! { &I16x4 },
-            "i16x8" => my_quote! { &I16x8 },
             "i32" => my_quote! { &I32 },
-            "i32x2" => my_quote! { &I32x2 },
-            "i32x4" => my_quote! { &I32x4 },
-            "i32x8" => my_quote! { &I32x8 },
             "i64" => my_quote! { &I64 },
-            "i64x2" => my_quote! { &I64x2 },
-            "i64x4" => my_quote! { &I64x4 },
             "i8" => my_quote! { &I8 },
-            "i8x16" => my_quote! { &I8x16 },
-            "i8x32" => my_quote! { &I8x32 },
-            "i8x8" => my_quote! { &I8x8 },
-            "u16x4" => my_quote! { &U16x4 },
-            "u16x8" => my_quote! { &U16x8 },
+            "u16" => my_quote! { &U16 },
             "u32" => my_quote! { &U32 },
-            "u32x2" => my_quote! { &U32x2 },
-            "u32x4" => my_quote! { &U32x4 },
-            "u32x8" => my_quote! { &U32x8 },
             "u64" => my_quote! { &U64 },
-            "u64x2" => my_quote! { &U64x2 },
-            "u64x4" => my_quote! { &U64x4 },
             "u8" => my_quote! { &U8 },
-            "u16" => my_quote! { &U16 },
-            "u8x16" => my_quote! { &U8x16 },
-            "u8x32" => my_quote! { &U8x32 },
-            "u16x16" => my_quote! { &U16x16 },
-            "u8x8" => my_quote! { &U8x8 },
             s => panic!("unspported type: {}", s),
         },
         syn::Type::Ptr(syn::TypePtr { ref elem, .. })
@@ -233,15 +212,34 @@ fn find_target_feature(
         .iter()
         .filter_map(|a| a.interpret_meta())
         .filter_map(|a| match a {
-            syn::Meta::NameValue(i) => {
+            syn::Meta::List(i) => {
                 if i.ident == "target_feature" {
-                    Some(i.lit)
+                    Some(i.nested)
                 } else {
                     None
                 }
             }
             _ => None,
         })
+        .flat_map(|list| list)
+        .filter_map(|nested| {
+            match nested {
+                syn::NestedMeta::Meta(m) => Some(m),
+                syn::NestedMeta::Literal(_) => None,
+            }
+        })
+        .filter_map(|m| {
+            match m {
+                syn::Meta::NameValue(i) => {
+                    if i.ident == "enable" {
+                        Some(i.lit)
+                    } else {
+                        None
+                    }
+                }
+                _ => None,
+            }
+        })
         .next()
         .expect(&format!("failed to find target_feature for {}", name))
 }
diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs
index 87c1a1fc87..c4ac03543d 100644
--- a/stdsimd-verify/tests/x86-intel.rs
+++ b/stdsimd-verify/tests/x86-intel.rs
@@ -22,51 +22,37 @@ struct Function {
 
 static BOOL: Type = Type::Bool;
 static F32: Type = Type::PrimFloat(32);
-static F32x4: Type = Type::Float(32, 4);
-static F32x8: Type = Type::Float(32, 8);
 static F64: Type = Type::PrimFloat(64);
-static F64x2: Type = Type::Float(64, 2);
-static F64x4: Type = Type::Float(64, 4);
 static I16: Type = Type::PrimSigned(16);
-static I16x16: Type = Type::Signed(16, 16);
-static I16x4: Type = Type::Signed(16, 4);
-static I16x8: Type = Type::Signed(16, 8);
 static I32: Type = Type::PrimSigned(32);
-static I32x2: Type = Type::Signed(32, 2);
-static I32x4: Type = Type::Signed(32, 4);
-static I32x8: Type = Type::Signed(32, 8);
 static I64: Type = Type::PrimSigned(64);
-static I64x2: Type = Type::Signed(64, 2);
-static I64x4: Type = Type::Signed(64, 4);
 static I8: Type = Type::PrimSigned(8);
-static I8x16: Type = Type::Signed(8, 16);
-static I8x32: Type = Type::Signed(8, 32);
-static I8x8: Type = Type::Signed(8, 8);
 static U16: Type = Type::PrimUnsigned(16);
-static U16x16: Type = Type::Unsigned(16, 16);
-// static U16x4: Type = Type::Unsigned(16, 4);
-static U16x8: Type = Type::Unsigned(16, 8);
 static U32: Type = Type::PrimUnsigned(32);
-static U32x2: Type = Type::Unsigned(32, 2);
-static U32x4: Type = Type::Unsigned(32, 4);
-static U32x8: Type = Type::Unsigned(32, 8);
 static U64: Type = Type::PrimUnsigned(64);
-static U64x2: Type = Type::Unsigned(64, 2);
-static U64x4: Type = Type::Unsigned(64, 4);
 static U8: Type = Type::PrimUnsigned(8);
-static U8x16: Type = Type::Unsigned(8, 16);
-static U8x32: Type = Type::Unsigned(8, 32);
-// static U8x8: Type = Type::Unsigned(8, 8);
+
+static M64: Type = Type::M64;
+static M128: Type = Type::M128;
+static M128I: Type = Type::M128I;
+static M128D: Type = Type::M128D;
+static M256: Type = Type::M256;
+static M256I: Type = Type::M256I;
+static M256D: Type = Type::M256D;
 
 #[derive(Debug)]
 enum Type {
-    Float(u8, u8),
     PrimFloat(u8),
     PrimSigned(u8),
     PrimUnsigned(u8),
     Ptr(&'static Type),
-    Signed(u8, u8),
-    Unsigned(u8, u8),
+    M64,
+    M128,
+    M128D,
+    M128I,
+    M256,
+    M256D,
+    M256I,
     Bool,
 }
 
@@ -271,33 +257,22 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
         (&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {}
         (&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {}
 
-        (&Type::Signed(a, b), "__m128i")
-        | (&Type::Unsigned(a, b), "__m128i")
-        | (&Type::Ptr(&Type::Signed(a, b)), "__m128i*")
-        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
-
-        (&Type::Signed(a, b), "__m256i")
-        | (&Type::Unsigned(a, b), "__m256i")
-        | (&Type::Ptr(&Type::Signed(a, b)), "__m256i*")
-        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*")
-            if (a as u32) * (b as u32) == 256 => {}
-
-        (&Type::Signed(a, b), "__m64")
-        | (&Type::Unsigned(a, b), "__m64")
-        | (&Type::Ptr(&Type::Signed(a, b)), "__m64*")
-        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
-
-        (&Type::Float(32, 4), "__m128") => {}
-        (&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {}
-
-        (&Type::Float(64, 2), "__m128d") => {}
-        (&Type::Ptr(&Type::Float(64, 2)), "__m128d*") => {}
+        (&Type::M64, "__m64")
+        | (&Type::Ptr(&Type::M64), "__m64*") => {}
 
-        (&Type::Float(32, 8), "__m256") => {}
-        (&Type::Ptr(&Type::Float(32, 8)), "__m256*") => {}
+        (&Type::M128I, "__m128i")
+        | (&Type::Ptr(&Type::M128I), "__m128i*")
+        | (&Type::M128D, "__m128d")
+        | (&Type::Ptr(&Type::M128D), "__m128d*")
+        | (&Type::M128, "__m128")
+        | (&Type::Ptr(&Type::M128), "__m128*") => {}
 
-        (&Type::Float(64, 4), "__m256d") => {}
-        (&Type::Ptr(&Type::Float(64, 4)), "__m256d*") => {}
+        (&Type::M256I, "__m256i")
+        | (&Type::Ptr(&Type::M256I), "__m256i*")
+        | (&Type::M256D, "__m256d")
+        | (&Type::Ptr(&Type::M256D), "__m256d*")
+        | (&Type::M256, "__m256")
+        | (&Type::Ptr(&Type::M256), "__m256*") => {}
 
         // These two intrinsics return a 16-bit element but in Intel's
         // intrinsics they're listed as returning an `int`.
@@ -311,7 +286,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
         // This is a macro (?) in C which seems to mutate its arguments, but
         // that means that we're taking pointers to arguments in rust
         // as we're not exposing it as a macro.
-        (&Type::Ptr(&Type::Float(32, 4)), "__m128")
+        (&Type::Ptr(&Type::M128), "__m128")
             if intrinsic == "_MM_TRANSPOSE4_PS" => {}
 
         // These intrinsics return an `int` in C but they're always either the