rust-lang · vbarrielle · Oct 5, 2017 · Oct 5, 2017
diff --git a/src/x86/avx.rs b/src/x86/avx.rs
@@ -139,6 +139,52 @@ pub unsafe fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 {
     addsubps256(a, b)
 }
 
+/// Horizontal addition of adjacent pairs in the two packed vectors
+/// of 4 64-bit floating points `a` and `b`.
+/// In the result, sums of elements from `a` are returned in even locations,
+/// while sums of elements from `b` are returned in odd locations.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vhaddpd))]
+pub unsafe fn _mm256_hadd_pd(a: f64x4, b: f64x4) -> f64x4 {
+    haddpd256(a, b)
+}
+
+/// Horizontal addition of adjacent pairs in the two packed vectors
+/// of 8 32-bit floating points `a` and `b`.
+/// In the result, sums of elements from `a` are returned in locations of
+/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
+/// 2, 3, 6, 7.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vhaddps))]
+pub unsafe fn _mm256_hadd_ps(a: f32x8, b: f32x8) -> f32x8 {
+    haddps256(a, b)
+}
+
+/// Horizontal subtraction of adjacent pairs in the two packed vectors
+/// of 4 64-bit floating points `a` and `b`.
+/// In the result, sums of elements from `a` are returned in even locations,
+/// while sums of elements from `b` are returned in odd locations.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vhsubpd))]
+pub unsafe fn _mm256_hsub_pd(a: f64x4, b: f64x4) -> f64x4 {
+    hsubpd256(a, b)
+}
+
+/// Horizontal subtraction of adjacent pairs in the two packed vectors
+/// of 8 32-bit floating points `a` and `b`.
+/// In the result, sums of elements from `a` are returned in locations of
+/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
+/// 2, 3, 6, 7.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vhsubps))]
+pub unsafe fn _mm256_hsub_ps(a: f32x8, b: f32x8) -> f32x8 {
+    hsubps256(a, b)
+}
+
 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
 /// from packed elements in `a`.
 #[inline(always)]
@@ -281,6 +327,14 @@ extern "C" {
     fn addsubpd256(a: f64x4, b: f64x4) -> f64x4;
     #[link_name = "llvm.x86.avx.addsub.ps.256"]
     fn addsubps256(a: f32x8, b: f32x8) -> f32x8;
+    #[link_name = "llvm.x86.avx.hadd.pd.256"]
+    fn haddpd256(a: f64x4, b: f64x4) -> f64x4;
+    #[link_name = "llvm.x86.avx.hadd.ps.256"]
+    fn haddps256(a: f32x8, b: f32x8) -> f32x8;
+    #[link_name = "llvm.x86.avx.hsub.pd.256"]
+    fn hsubpd256(a: f64x4, b: f64x4) -> f64x4;
+    #[link_name = "llvm.x86.avx.hsub.ps.256"]
+    fn hsubps256(a: f32x8, b: f32x8) -> f32x8;
     #[link_name = "llvm.x86.avx.max.pd.256"]
     fn maxpd256(a: f64x4, b: f64x4) -> f64x4;
     #[link_name = "llvm.x86.avx.max.ps.256"]
@@ -432,6 +486,43 @@ mod tests {
         assert_eq!(r, e);
     }
 
+    #[simd_test = "avx"]
+    unsafe fn _mm256_hadd_pd() {
+        let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
+        let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
+        let r = avx::_mm256_hadd_pd(a, b);
+        let e = f64x4::new(3.0, 11.0, 7.0, 15.0);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_hadd_ps() {
+        let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0);
+        let b = f32x8::new(5.0, 6.0, 7.0, 8.0, 5.0, 6.0, 7.0, 8.0);
+        let r = avx::_mm256_hadd_ps(a, b);
+        let e = f32x8::new(3.0, 7.0, 11.0, 15.0, 3.0, 7.0, 11.0, 15.0);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_hsub_pd() {
+        let a = f64x4::new(1.0, 2.0, 3.0, 4.0);
+        let b = f64x4::new(5.0, 6.0, 7.0, 8.0);
+        let r = avx::_mm256_hsub_pd(a, b);
+        let e = f64x4::new(-1., -1., -1., -1.);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_hsub_ps() {
+        let a = f32x8::new(1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0);
+        let b = f32x8::new(5.0, 6.0, 7.0, 8.0, 5.0, 6.0, 7.0, 8.0);
+        let r = avx::_mm256_hsub_ps(a, b);
+        let e = f32x8::new(-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0);
+        assert_eq!(r, e);
+    }
+
+
     #[simd_test = "avx"]
     unsafe fn _mm256_sub_pd() {
         let a = f64x4::new(1.0, 2.0, 3.0, 4.0);