Skip to content

Commit 9158465

Browse files
TDeckingAmanieu
authored andcommitted
Replace addsub variations
1 parent 90543b0 commit 9158465

File tree

1 file changed

+25
-29
lines changed
  • crates/core_arch/src/x86

1 file changed

+25
-29
lines changed

crates/core_arch/src/x86/fma.rs

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
2020
2121
use crate::core_arch::x86::*;
22-
use crate::intrinsics::simd::{simd_fma, simd_insert, simd_neg};
22+
use crate::intrinsics::simd::{simd_fma, simd_insert, simd_neg, simd_shuffle};
2323
use crate::intrinsics::{fmaf32, fmaf64};
2424

2525
#[cfg(test)]
@@ -119,7 +119,9 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
119119
#[cfg_attr(test, assert_instr(vfmaddsub))]
120120
#[stable(feature = "simd_x86", since = "1.27.0")]
121121
pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
122-
vfmaddsubpd(a, b, c)
122+
let add = simd_fma(a, b, c);
123+
let sub = simd_fma(a, b, simd_neg(c));
124+
simd_shuffle!(add, sub, [2, 1])
123125
}
124126

125127
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -132,7 +134,9 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
132134
#[cfg_attr(test, assert_instr(vfmaddsub))]
133135
#[stable(feature = "simd_x86", since = "1.27.0")]
134136
pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
135-
vfmaddsubpd256(a, b, c)
137+
let add = simd_fma(a, b, c);
138+
let sub = simd_fma(a, b, simd_neg(c));
139+
simd_shuffle!(add, sub, [4, 1, 6, 3])
136140
}
137141

138142
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -145,7 +149,9 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
145149
#[cfg_attr(test, assert_instr(vfmaddsub))]
146150
#[stable(feature = "simd_x86", since = "1.27.0")]
147151
pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
148-
vfmaddsubps(a, b, c)
152+
let add = simd_fma(a, b, c);
153+
let sub = simd_fma(a, b, simd_neg(c));
154+
simd_shuffle!(add, sub, [4, 1, 6, 3])
149155
}
150156

151157
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -158,7 +164,9 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
158164
#[cfg_attr(test, assert_instr(vfmaddsub))]
159165
#[stable(feature = "simd_x86", since = "1.27.0")]
160166
pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
161-
vfmaddsubps256(a, b, c)
167+
let add = simd_fma(a, b, c);
168+
let sub = simd_fma(a, b, simd_neg(c));
169+
simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
162170
}
163171

164172
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -255,7 +263,9 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
255263
#[cfg_attr(test, assert_instr(vfmsubadd))]
256264
#[stable(feature = "simd_x86", since = "1.27.0")]
257265
pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
258-
vfmsubaddpd(a, b, c)
266+
let add = simd_fma(a, b, c);
267+
let sub = simd_fma(a, b, simd_neg(c));
268+
simd_shuffle!(add, sub, [0, 3])
259269
}
260270

261271
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -268,7 +278,9 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
268278
#[cfg_attr(test, assert_instr(vfmsubadd))]
269279
#[stable(feature = "simd_x86", since = "1.27.0")]
270280
pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
271-
vfmsubaddpd256(a, b, c)
281+
let add = simd_fma(a, b, c);
282+
let sub = simd_fma(a, b, simd_neg(c));
283+
simd_shuffle!(add, sub, [0, 5, 2, 7])
272284
}
273285

274286
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -281,7 +293,9 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d
281293
#[cfg_attr(test, assert_instr(vfmsubadd))]
282294
#[stable(feature = "simd_x86", since = "1.27.0")]
283295
pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
284-
vfmsubaddps(a, b, c)
296+
let add = simd_fma(a, b, c);
297+
let sub = simd_fma(a, b, simd_neg(c));
298+
simd_shuffle!(add, sub, [0, 5, 2, 7])
285299
}
286300

287301
/// Multiplies packed single-precision (32-bit) floating-point elements in `a`
@@ -294,7 +308,9 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
294308
#[cfg_attr(test, assert_instr(vfmsubadd))]
295309
#[stable(feature = "simd_x86", since = "1.27.0")]
296310
pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
297-
vfmsubaddps256(a, b, c)
311+
let add = simd_fma(a, b, c);
312+
let sub = simd_fma(a, b, simd_neg(c));
313+
simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
298314
}
299315

300316
/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
@@ -471,26 +487,6 @@ pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
471487
)
472488
}
473489

474-
#[allow(improper_ctypes)]
475-
extern "C" {
476-
#[link_name = "llvm.x86.fma.vfmaddsub.pd"]
477-
fn vfmaddsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
478-
#[link_name = "llvm.x86.fma.vfmaddsub.pd.256"]
479-
fn vfmaddsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
480-
#[link_name = "llvm.x86.fma.vfmaddsub.ps"]
481-
fn vfmaddsubps(a: __m128, b: __m128, c: __m128) -> __m128;
482-
#[link_name = "llvm.x86.fma.vfmaddsub.ps.256"]
483-
fn vfmaddsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
484-
#[link_name = "llvm.x86.fma.vfmsubadd.pd"]
485-
fn vfmsubaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
486-
#[link_name = "llvm.x86.fma.vfmsubadd.pd.256"]
487-
fn vfmsubaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
488-
#[link_name = "llvm.x86.fma.vfmsubadd.ps"]
489-
fn vfmsubaddps(a: __m128, b: __m128, c: __m128) -> __m128;
490-
#[link_name = "llvm.x86.fma.vfmsubadd.ps.256"]
491-
fn vfmsubaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
492-
}
493-
494490
#[cfg(test)]
495491
mod tests {
496492

0 commit comments

Comments
 (0)