Skip to content

Commit 3d7712b

Browse files
committed
Use fmuladd for fma and document this behavior
1 parent 4e89670 commit 3d7712b

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

src/api/math/float/fma.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,19 @@ macro_rules! impl_math_float_fma {
55
impl $id {
66
/// Fused multiply add: `self * y + z`
77
///
8+
/// On some architectures, it is possible to combine a multiply
9+
/// followed by an addition in a single instruction.
10+
/// Besides performance, this may also offer better precision
11+
/// than performing the operations individually.
12+
///
13+
/// Note that using this function does **not** guarantee that a FMA
14+
/// instruction will be emitted;
15+
/// the architecture may not support it, or the compiler may decide
16+
/// it's more efficient in a specific case not to use it.
17+
///
18+
/// Use your architecture's intrinsic if you absolutely require
19+
/// the extended precision in all circumstances.
20+
///
821
/// Most architectures which have support for FMA
922
/// also have an equivalent version of this function,
1023
/// fused multiply subtract (`self * y - z`).

src/codegen/math/float/fma.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,25 @@ crate trait Fma {
1111
#[cfg(not(target_arch = "s390x"))]
1212
#[allow(improper_ctypes)]
1313
extern "C" {
14-
#[link_name = "llvm.fma.v2f32"]
14+
// We use the `fmuladd` intrinsic instead of `fma` to allow LLVM to decide
15+
// on a per-case basis whether it's better to use FMA or not.
16+
#[link_name = "llvm.fmuladd.v2f32"]
1517
fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
16-
#[link_name = "llvm.fma.v4f32"]
18+
#[link_name = "llvm.fmuladd.v4f32"]
1719
fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
18-
#[link_name = "llvm.fma.v8f32"]
20+
#[link_name = "llvm.fmuladd.v8f32"]
1921
fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
20-
#[link_name = "llvm.fma.v16f32"]
22+
#[link_name = "llvm.fmuladd.v16f32"]
2123
fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
2224
/* FIXME 64-bit single elem vectors
23-
#[link_name = "llvm.fma.v1f64"]
25+
#[link_name = "llvm.fmuladd.v1f64"]
2426
fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
2527
*/
26-
#[link_name = "llvm.fma.v2f64"]
28+
#[link_name = "llvm.fmuladd.v2f64"]
2729
fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
28-
#[link_name = "llvm.fma.v4f64"]
30+
#[link_name = "llvm.fmuladd.v4f64"]
2931
fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
30-
#[link_name = "llvm.fma.v8f64"]
32+
#[link_name = "llvm.fmuladd.v8f64"]
3133
fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
3234
}
3335

0 commit comments

Comments
 (0)