From 3deff8419610470146cbffc8210116cf7e7f9509 Mon Sep 17 00:00:00 2001
From: Tomek Czajka <tczajka@gmail.com>
Date: Tue, 10 Dec 2024 15:03:01 +0100
Subject: [PATCH 1/5] Correctly rounded floating point `div_euclid`.

Fixes #107904.
---
 library/std/src/f128.rs                  |  33 ++--
 library/std/src/f128/div_euclid.rs       | 204 +++++++++++++++++++++++
 library/std/src/f128/div_euclid/tests.rs |  23 +++
 library/std/src/f128/tests.rs            | 117 ++++++++++++-
 library/std/src/f128/u256.rs             |  63 +++++++
 library/std/src/f128/u256/tests.rs       |  39 +++++
 library/std/src/f16.rs                   |  32 ++--
 library/std/src/f16/div_euclid.rs        | 111 ++++++++++++
 library/std/src/f16/div_euclid/tests.rs  |  13 ++
 library/std/src/f16/tests.rs             |  61 +++++++
 library/std/src/f32.rs                   |  32 ++--
 library/std/src/f32/div_euclid.rs        | 204 +++++++++++++++++++++++
 library/std/src/f32/div_euclid/tests.rs  |  21 +++
 library/std/src/f32/tests.rs             | 105 ++++++++++++
 library/std/src/f64.rs                   |  32 ++--
 library/std/src/f64/div_euclid.rs        | 204 +++++++++++++++++++++++
 library/std/src/f64/div_euclid/tests.rs  |  21 +++
 library/std/src/f64/tests.rs             | 116 +++++++++++++
 library/std/src/lib.rs                   |   1 +
 19 files changed, 1367 insertions(+), 65 deletions(-)
 create mode 100644 library/std/src/f128/div_euclid.rs
 create mode 100644 library/std/src/f128/div_euclid/tests.rs
 create mode 100644 library/std/src/f128/u256.rs
 create mode 100644 library/std/src/f128/u256/tests.rs
 create mode 100644 library/std/src/f16/div_euclid.rs
 create mode 100644 library/std/src/f16/div_euclid/tests.rs
 create mode 100644 library/std/src/f32/div_euclid.rs
 create mode 100644 library/std/src/f32/div_euclid/tests.rs
 create mode 100644 library/std/src/f64/div_euclid.rs
 create mode 100644 library/std/src/f64/div_euclid/tests.rs

diff --git a/library/std/src/f128.rs b/library/std/src/f128.rs
index e93e915159e40..56f3df1b4aa95 100644
--- a/library/std/src/f128.rs
+++ b/library/std/src/f128.rs
@@ -4,6 +4,9 @@
 //!
 //! Mathematically significant numbers are provided in the `consts` sub-module.
 
+mod div_euclid;
+mod u256;
+
 #[cfg(test)]
 mod tests;
 
@@ -235,10 +238,14 @@ impl f128 {
 
     /// Calculates Euclidean division, the matching method for `rem_euclid`.
     ///
-    /// This computes the integer `n` such that
+    /// In infinite precision this computes the integer `n` such that
     /// `self = n * rhs + self.rem_euclid(rhs)`.
-    /// In other words, the result is `self / rhs` rounded to the integer `n`
-    /// such that `self >= n * rhs`.
+    /// In other words, the result is `self / rhs` rounded to the
+    /// integer `n` such that `self >= n * rhs`.
+    ///
+    /// However, due to a floating point round-off error the result can be rounded if
+    /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented
+    /// exactly.
     ///
     /// # Precision
     ///
@@ -264,29 +271,23 @@ impl f128 {
     #[unstable(feature = "f128", issue = "116909")]
     #[must_use = "method returns a new number and does not mutate the original value"]
     pub fn div_euclid(self, rhs: f128) -> f128 {
-        let q = (self / rhs).trunc();
-        if self % rhs < 0.0 {
-            return if rhs > 0.0 { q - 1.0 } else { q + 1.0 };
-        }
-        q
+        div_euclid::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
     ///
-    /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in
-    /// most cases. However, due to a floating point round-off error it can
-    /// result in `r == rhs.abs()`, violating the mathematical definition, if
-    /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
-    /// This result is not an element of the function's codomain, but it is the
-    /// closest floating point number in the real numbers and thus fulfills the
-    /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)`
-    /// approximately.
+    /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`.
+    ///
+    /// However, due to a floating point round-off error the result can round to
+    /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
     ///
     /// # Precision
     ///
     /// The result of this operation is guaranteed to be the rounded
     /// infinite-precision result.
     ///
+    /// The result is precise when `self >= 0.0`.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/library/std/src/f128/div_euclid.rs b/library/std/src/f128/div_euclid.rs
new file mode 100644
index 0000000000000..38334ac674aaf
--- /dev/null
+++ b/library/std/src/f128/div_euclid.rs
@@ -0,0 +1,204 @@
+#[cfg(test)]
+mod tests;
+
+use crate::f128::u256::U256;
+
+/// Software implementation of `f128::div_euclid`.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: f128, b: f128) -> f128 {
+    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
+        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
+    {
+        let exp = a_exp - b_exp;
+        match (a_neg, b_neg) {
+            (false, false) => div_floor(exp, a_m, b_m),
+            (true, false) => -div_ceil(exp, a_m, b_m),
+            (false, true) => -div_floor(exp, a_m, b_m),
+            (true, true) => div_ceil(exp, a_m, b_m),
+        }
+    } else {
+        // `a` or `b` are +-0.0 or infinity or NaN.
+        // `a / b` is also +-0.0 or infinity or NaN.
+        // There is no need to round to an integer.
+        a / b
+    }
+}
+
+/// Returns `floor((a << exp) / b)`.
+///
+/// Requires `2^112 <= a, b < 2^113`.
+fn div_floor(exp: i32, a: u128, b: u128) -> f128 {
+    if exp < 0 {
+        0.0
+    } else if exp <= 15 {
+        // aa < (2^113 << 15) = 2^128
+        let aa = a << exp;
+        // q < 2^128 / 2^112 = 2^16
+        let q = (aa / b) as u32;
+        // We have to use `as` because `From<u32> for f128` is not yet implemented.
+        q as f128
+    } else if exp <= 127 {
+        // aa = a << exp
+        // aa < (2^113 << 127) = 2^240
+        let aa = U256::shl_u128(a, exp as u32);
+        // q < 2^240 / 2^112 = 2^128
+        let (q, _) = aa.div_rem(b);
+        q as f128
+    } else {
+        // aa >= (2^112 << 127) = 2^239
+        // aa < (2^113 << 127) = 2^240
+        let aa = U256::shl_u128(a, 127);
+        // e > 0
+        // The result is floor((aa << e) / b).
+        let e = (exp - 127) as u32;
+
+        // aa = q * b + r
+        // q >= 2^239 / 2^113 = 2^126
+        // q < 2^239 / 2^112 = 2^128
+        // 0 <= r < b
+        let (q, r) = aa.div_rem(b);
+
+        // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
+        // 0 <= (r << e) / b < 2^e
+        //
+        // There are two cases:
+        // 1. floor((r << e) / b) = 0
+        // 2. 0 < floor((r << e) / b) < 2^e
+        //
+        // In case 1:
+        // The result is q << e.
+        //
+        // In case 2:
+        // The result is (q << e) + non-zero low e bits.
+        // This rounds the same way as (q | 1) << e because rounding beyond
+        // the 25 most significant bits of q depends only on whether the low-order
+        // bits are non-zero.
+        //
+        // Case 1 happens when:
+        // (r << e) / b < 1
+        // (r << e) <= b - 1
+        // r <= ((b - 1) >> e)
+        let case_1_bound = if e < 128 { (b - 1) >> e } else { 0 };
+        let q_adj = if r <= case_1_bound {
+            // Case 1.
+            q
+        } else {
+            // Case 2.
+            q | 1
+        };
+        q_adj as f128 * pow2(e)
+    }
+}
+
+/// Returns `ceil((a << exp) / b)`.
+///
+/// Requires `2^112 <= a, b < 2^113`.
+fn div_ceil(exp: i32, a: u128, b: u128) -> f128 {
+    if exp < 0 {
+        1.0
+    } else if exp <= 15 {
+        // aa < (2^113 << 15) = 2^128
+        let aa = a << exp;
+        // q < 2^128 / 2^112 + 1 = 2^16 + 1
+        let q = ((aa - 1) / b) as u32 + 1;
+        // We have to use `as` because `From<u32> for f128` is not yet implemented.
+        q as f128
+    } else if exp <= 127 {
+        // aa = a << exp
+        // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
+        let aa = U256::shl_u128(a, exp as u32);
+        // q <= (2^240 - 2^127) / 2^112 + 1 = 2^128 - 2^15 + 1
+        let (q, _) = (aa - U256::ONE).div_rem(b);
+        (q + 1) as f128
+    } else {
+        // aa >= (2^112 << 127) = 2^239
+        // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
+        let aa = U256::shl_u128(a, 127);
+        // e > 0
+        // The result is ceil((aa << e) / b).
+        let e = (exp - 127) as u32;
+
+        // aa = q * b + r
+        // q >= 2^239 / 2^112 = 2^126
+        // q <= (2^240 - 2^127) / 2^112 = 2^128 - 2^15
+        // 0 <= r < b
+        let (q, r) = aa.div_rem(b);
+
+        // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
+        // 0 <= (r << e) / b < 2^e
+        //
+        // There are three cases:
+        // 1. ceil((r << e) / b) = 0
+        // 2. 0 < ceil((r << e) / b) < 2^e
+        // 3. ceil((r << e) / b) = 2^e
+        //
+        // In case 1:
+        // The result is q << e.
+        //
+        // In case 2:
+        // The result is (q << e) + non-zero low e bits.
+        // This rounds the same way as (q | 1) << e because rounding beyond
+        // the 54 most significant bits of q depends only on whether the low-order
+        // bits are non-zero.
+        //
+        // In case 3:
+        // The result is (q + 1) << e.
+        //
+        // Case 1 happens when r = 0.
+        // Case 3 happens when:
+        // (r << e) / b > (1 << e) - 1
+        // (r << e) > (b << e) - b
+        // ((b - r) << e) <= b - 1
+        // b - r <= (b - 1) >> e
+        // r >= b - ((b - 1) >> e)
+        let case_3_bound = b - if e < 128 { (b - 1) >> e } else { 0 };
+        let q_adj = if r == 0 {
+            // Case 1.
+            q
+        } else if r < case_3_bound {
+            // Case 2.
+            q | 1
+        } else {
+            // Case 3.
+            q + 1
+        };
+        q_adj as f128 * pow2(e)
+    }
+}
+
+/// For finite, non-zero numbers returns (sign, exponent, mantissa).
+///
+/// `x = (-1)^sign * 2^exp * mantissa`
+///
+/// `2^112 <= mantissa < 2^113`
+fn normal_form(x: f128) -> Option<(bool, i32, u128)> {
+    let bits = x.to_bits();
+    let sign = bits >> 127 != 0;
+    let biased_exponent = (bits >> 112 & 0x7fff) as i32;
+    let significand = bits & ((1 << 112) - 1);
+    match biased_exponent {
+        0 if significand == 0 => {
+            // 0.0
+            None
+        }
+        0 => {
+            // Subnormal number: 2^(-16382-112) * significand.
+            // We want mantissa to have exactly 15 leading zeros.
+            let shift = significand.leading_zeros() - 15;
+            Some((sign, -16382 - 112 - shift as i32, significand << shift))
+        }
+        0x7fff => {
+            // Infinity or NaN.
+            None
+        }
+        _ => {
+            // Normal number: 2^(biased_exponent-16383-112) * (2^112 + significand)
+            Some((sign, biased_exponent - 16383 - 112, 1 << 112 | significand))
+        }
+    }
+}
+
+/// Returns `2^exp`.
+fn pow2(exp: u32) -> f128 {
+    if exp <= 16383 { f128::from_bits(u128::from(exp + 16383) << 112) } else { f128::INFINITY }
+}
diff --git a/library/std/src/f128/div_euclid/tests.rs b/library/std/src/f128/div_euclid/tests.rs
new file mode 100644
index 0000000000000..a89004bf10ea8
--- /dev/null
+++ b/library/std/src/f128/div_euclid/tests.rs
@@ -0,0 +1,23 @@
+#![cfg(reliable_f128_math)]
+
+#[test]
+fn test_normal_form() {
+    use crate::f128::div_euclid::normal_form;
+
+    assert_eq!(normal_form(-1.5f128), Some((true, -112, 3 << 111)));
+    assert_eq!(normal_form(f128::MIN_POSITIVE), Some((false, -16494, 1 << 112)));
+    assert_eq!(normal_form(f128::MIN_POSITIVE / 2.0), Some((false, -16495, 1 << 112)));
+    assert_eq!(normal_form(f128::MAX), Some((false, 16271, (1 << 113) - 1)));
+    assert_eq!(normal_form(0.0), None);
+    assert_eq!(normal_form(f128::INFINITY), None);
+    assert_eq!(normal_form(f128::NAN), None);
+}
+
+#[test]
+fn test_pow2() {
+    use crate::f128::div_euclid::pow2;
+
+    assert_eq!(pow2(0), 1.0f128);
+    assert_eq!(pow2(4), 16.0f128);
+    assert_eq!(pow2(16384), f128::INFINITY);
+}
diff --git a/library/std/src/f128/tests.rs b/library/std/src/f128/tests.rs
index cbcf9f96239bb..299d74d19b005 100644
--- a/library/std/src/f128/tests.rs
+++ b/library/std/src/f128/tests.rs
@@ -460,7 +460,122 @@ fn test_mul_add() {
 }
 
 #[test]
-#[cfg(reliable_f16_math)]
+#[cfg(reliable_f128_math)]
+fn test_div_euclid() {
+    use core::cmp::Ordering;
+
+    let nan = f128::NAN;
+    let inf = f128::INFINITY;
+    let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS);
+
+    // Infinity and NaN.
+
+    assert!(nan.div_euclid(5.0).is_nan());
+    assert!(inf.div_euclid(inf).is_nan());
+    assert!(0.0f128.div_euclid(0.0).is_nan());
+
+    assert_eq!(inf.div_euclid(3.0), inf);
+    assert_eq!(inf.div_euclid(0.0), inf);
+    assert_eq!(5.0f128.div_euclid(0.0), inf);
+    assert_eq!((-5.0f128).div_euclid(0.0), -inf);
+
+    // 0 / x
+    assert_eq!(Ordering::Equal, 0.0f128.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(10.0).total_cmp(&-0.0));
+
+    // small / large
+    assert_eq!(Ordering::Equal, 1.0f128.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!((-1.0f128).div_euclid(10.0), -1.0);
+    assert_eq!(Ordering::Equal, 1.0f128.div_euclid(-10.0).total_cmp(&-0.0));
+    assert_eq!((-1.0f128).div_euclid(-10.0), 1.0);
+
+    // Small results.
+    assert_eq!(20.0f128.div_euclid(10.0), 2.0);
+    assert_eq!((-20.0f128).div_euclid(10.0), -2.0);
+    assert_eq!(20.0f128.div_euclid(-10.0), -2.0);
+    assert_eq!((-20.0f128).div_euclid(-10.0), 2.0);
+
+    assert_eq!(23.0f128.div_euclid(10.0), 2.0);
+    assert_eq!((-23.0f128).div_euclid(10.0), -3.0);
+    assert_eq!(23.0f128.div_euclid(-10.0), -2.0);
+    assert_eq!((-23.0f128).div_euclid(-10.0), 3.0);
+
+    assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0);
+
+    // Medium results.
+    assert_eq!(1e9f128.div_euclid(10.0), 1e8);
+    assert_eq!((-1e9f128).div_euclid(10.0), -1e8);
+    assert_eq!(1e9f128.div_euclid(-10.0), -1e8);
+    assert_eq!((-1e9f128).div_euclid(-10.0), 1e8);
+
+    assert_eq!(1_000_000_003.0f128.div_euclid(10.0), 100_000_000.0);
+    assert_eq!((-1_000_000_003.0f128).div_euclid(10.0), -100_000_001.0);
+    assert_eq!(1_000_000_003.0f128.div_euclid(-10.0), -100_000_000.0);
+    assert_eq!((-1_000_000_003.0f128).div_euclid(-10.0), 100_000_001.0);
+
+    // Infinite results.
+    assert_eq!(1e4000f128.div_euclid(1e-4000), inf);
+    assert_eq!((-1e4000f128).div_euclid(1e-4000), -inf);
+    assert_eq!(1e4000f128.div_euclid(-1e-4000), -inf);
+    assert_eq!((-1e4000f128).div_euclid(-1e-4000), inf);
+
+    // Large results, exactly divisible.
+    assert_eq!(1e4000f128.div_euclid(2.0), 0.5e4000);
+    assert_eq!((-1e4000f128).div_euclid(2.0), -0.5e4000);
+    assert_eq!(1e4000f128.div_euclid(-2.0), -0.5e4000);
+    assert_eq!((-1e4000f128).div_euclid(-2.0), 0.5e4000);
+
+    // Large results, not divisible.
+    let a = 4281743078117879643174857908348485409148239872f128; // 3 * 2^150
+    let b = 7f128;
+    // a / b = 0x1b6db6db6db6db6db6db6db6db6db6db6db6db.6d...
+    //   = 611677582588268520453551129764069344164034267.42...
+    // floor(a / b) = 0x1b6db6db6db6db6db6db6db6db6db6db6db6db
+    // ceil(a / b) = 0x1b6db6db6db6db6db6db6db6db6db6db6db6dc
+    // Both round to 0x1b6db6db6db6db6db6db6db6db6db000000000
+    //   = 611677582588268520453551129764069314712829952
+    assert_eq!(a.div_euclid(b), 611677582588268520453551129764069314712829952f128);
+    assert_eq!((-a).div_euclid(-b), 611677582588268520453551129764069314712829952f128);
+
+    // Large results, test precise rounding edge cases.
+
+    // a = 0x3abab02be95086e38544e68b3c78a0000000000000000000000000000000000
+    // b = 0x10d3b8a2cc1076df4f04ec48a0a4e
+    let a = 1660249666442741415340463590408472308581124489457651197969883177321645473792f128;
+    let b = 5460685408006286130247220436011598f128;
+
+    // a / b = 0x37d7bea0c08a6e5c00f664cda8aa1000000.be...
+    //       = 304036863945418882481381401039785785556992.74...
+    // floor(a / b) = 0x37d7bea0c08a6e5c00f664cda8aa1000000
+    // Rounds down to 0x37d7bea0c08a6e5c00f664cda8aa0000000
+    //   = 304036863945418882481381401039785785556992
+    assert_eq!(a.div_euclid(b), 304036863945418882481381401039785785556992f128);
+
+    // ceil(a / b) = 0x37d7bea0c08a6e5c00f664cda8aa1000001
+    // Rounds up to 0x37d7bea0c08a6e5c00f664cda8aa2000000
+    //   = 304036863945418882481381401039785802334208
+    assert_eq!((-a).div_euclid(-b), 304036863945418882481381401039785802334208f128);
+
+    // a = 0x36625c03bcfd7d822e780b258e6ec0000000000000000000000000000000000
+    // b = 0x166d7391fbd4d59a891d674eb11f3
+    let a = 1537417493580712628196032571713889096389680551356671222166298628797042262016f128;
+    let b = 7278154372064079194208203952558579f128;
+
+    // a / b = 0x26cc5433ff626fe2bbc14362e154affffff.5f...
+    //       = 211237274587334998320332278178386764890111.37...
+    // floor(a / b) = 0x26cc5433ff626fe2bbc14362e154affffff
+    // Rounds down to 0x26cc5433ff626fe2bbc14362e154a000000
+    //   = 211237274587334998320332278178386748112896
+    assert_eq!(a.div_euclid(b), 211237274587334998320332278178386748112896f128);
+
+    // ceil(a / b) = 0x26cc5433ff626fe2bbc14362e154b000000
+    // Rounds up to 0x26cc5433ff626fe2bbc14362e154c000000
+    //   = 211237274587334998320332278178386781667328
+    assert_eq!((-a).div_euclid(-b), 211237274587334998320332278178386781667328f128);
+}
+
+#[test]
+#[cfg(reliable_f128_math)]
 fn test_recip() {
     let nan: f128 = f128::NAN;
     let inf: f128 = f128::INFINITY;
diff --git a/library/std/src/f128/u256.rs b/library/std/src/f128/u256.rs
new file mode 100644
index 0000000000000..5c812437acbbe
--- /dev/null
+++ b/library/std/src/f128/u256.rs
@@ -0,0 +1,63 @@
+//! Very limited 256-bit arithmetic.
+
+#[cfg(test)]
+mod tests;
+
+use core::ops::{Sub, SubAssign};
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub(crate) struct U256 {
+    high: u128,
+    low: u128,
+}
+
+impl U256 {
+    pub(crate) const ONE: Self = U256 { high: 0, low: 1 };
+
+    /// `x << shift`.
+    ///
+    /// Requires `0 <= shift < 128`.
+    pub(crate) fn shl_u128(x: u128, shift: u32) -> Self {
+        Self { low: x << shift, high: if shift != 0 { x >> (128 - shift) } else { 0 } }
+    }
+
+    /// The number of leading 0 bits.
+    pub(crate) fn leading_zeros(self) -> u32 {
+        if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() }
+    }
+
+    /// Returns (quotient, remainder).
+    ///
+    /// Requires `self < (rhs << 128)`
+    pub(crate) fn div_rem(mut self, rhs: u128) -> (u128, u128) {
+        let rhs_len = 128 - rhs.leading_zeros();
+        let self_len = 256 - self.leading_zeros();
+        // self < (rhs << shift_limit)
+        let shift_limit = (self_len + 1).saturating_sub(rhs_len).min(128);
+        let mut q = 0;
+        for shift in (0..shift_limit).rev() {
+            let rhs_shifted = Self::shl_u128(rhs, shift);
+            if self >= rhs_shifted {
+                q |= 1 << shift;
+                self -= rhs_shifted;
+            }
+        }
+        (q, self.low)
+    }
+}
+
+impl Sub for U256 {
+    type Output = Self;
+
+    fn sub(self, rhs: U256) -> Self {
+        let (low, borrow) = self.low.overflowing_sub(rhs.low);
+        let (high, _) = self.high.borrowing_sub(rhs.high, borrow);
+        Self { low, high }
+    }
+}
+
+impl SubAssign for U256 {
+    fn sub_assign(&mut self, rhs: U256) {
+        *self = *self - rhs;
+    }
+}
diff --git a/library/std/src/f128/u256/tests.rs b/library/std/src/f128/u256/tests.rs
new file mode 100644
index 0000000000000..15ed4981eb9b4
--- /dev/null
+++ b/library/std/src/f128/u256/tests.rs
@@ -0,0 +1,39 @@
+use crate::f128::u256::U256;
+
+#[test]
+fn test_shl_u128() {
+    let a = 0x0123456789abcdef0123456789abcdef;
+
+    let b = U256::shl_u128(a, 0);
+    assert_eq!(b.high, 0);
+    assert_eq!(b.low, a);
+
+    let b = U256::shl_u128(a, 40);
+    assert_eq!(b.high, 0x0123456789);
+    assert_eq!(b.low, 0xabcdef0123456789abcdef0000000000);
+}
+
+#[test]
+fn test_leading_zeros() {
+    assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256);
+    assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116);
+    assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252);
+}
+
+#[test]
+fn test_sub() {
+    let a = U256 { high: 7, low: 10 };
+    let b = U256 { high: 3, low: 2 };
+    let c = U256 { high: 3, low: 15 };
+    assert_eq!(a - b, U256 { high: 4, low: 8 });
+    assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 });
+}
+
+#[test]
+fn test_div_rem() {
+    let a = U256 { high: 0, low: 7 };
+    let b = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef };
+    let c = 0x9876543210abc;
+    assert_eq!(a.div_rem(c), (0, 7));
+    assert_eq!(b.div_rem(c), (0x1207a42fc2c725f9cf302ff31d, 0x7c11e593922a3));
+}
diff --git a/library/std/src/f16.rs b/library/std/src/f16.rs
index 5b7fcaa28e064..062de8d6ef622 100644
--- a/library/std/src/f16.rs
+++ b/library/std/src/f16.rs
@@ -4,6 +4,8 @@
 //!
 //! Mathematically significant numbers are provided in the `consts` sub-module.
 
+mod div_euclid;
+
 #[cfg(test)]
 mod tests;
 
@@ -235,10 +237,14 @@ impl f16 {
 
     /// Calculates Euclidean division, the matching method for `rem_euclid`.
     ///
-    /// This computes the integer `n` such that
+    /// In infinite precision this computes the integer `n` such that
     /// `self = n * rhs + self.rem_euclid(rhs)`.
-    /// In other words, the result is `self / rhs` rounded to the integer `n`
-    /// such that `self >= n * rhs`.
+    /// In other words, the result is `self / rhs` rounded to the
+    /// integer `n` such that `self >= n * rhs`.
+    ///
+    /// However, due to a floating point round-off error the result can be rounded if
+    /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented
+    /// exactly.
     ///
     /// # Precision
     ///
@@ -264,29 +270,23 @@ impl f16 {
     #[unstable(feature = "f16", issue = "116909")]
     #[must_use = "method returns a new number and does not mutate the original value"]
     pub fn div_euclid(self, rhs: f16) -> f16 {
-        let q = (self / rhs).trunc();
-        if self % rhs < 0.0 {
-            return if rhs > 0.0 { q - 1.0 } else { q + 1.0 };
-        }
-        q
+        div_euclid::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
     ///
-    /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in
-    /// most cases. However, due to a floating point round-off error it can
-    /// result in `r == rhs.abs()`, violating the mathematical definition, if
-    /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
-    /// This result is not an element of the function's codomain, but it is the
-    /// closest floating point number in the real numbers and thus fulfills the
-    /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)`
-    /// approximately.
+    /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`.
+    ///
+    /// However, due to a floating point round-off error the result can round to
+    /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
     ///
     /// # Precision
     ///
     /// The result of this operation is guaranteed to be the rounded
     /// infinite-precision result.
     ///
+    /// The result is precise when `self >= 0.0`.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/library/std/src/f16/div_euclid.rs b/library/std/src/f16/div_euclid.rs
new file mode 100644
index 0000000000000..d1872f00f191c
--- /dev/null
+++ b/library/std/src/f16/div_euclid.rs
@@ -0,0 +1,111 @@
+#[cfg(test)]
+mod tests;
+
+/// Software implementation of `f16::div_euclid`.
+#[allow(dead_code)] // f16::div_euclid is cfg(not(test))
+pub(crate) fn div_euclid(a: f16, b: f16) -> f16 {
+    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
+        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
+    {
+        let exp = a_exp - b_exp;
+        match (a_neg, b_neg) {
+            (false, false) => div_floor(exp, a_m, b_m),
+            (true, false) => -div_ceil(exp, a_m, b_m),
+            (false, true) => -div_floor(exp, a_m, b_m),
+            (true, true) => div_ceil(exp, a_m, b_m),
+        }
+    } else {
+        // `a` or `b` are +-0.0 or infinity or NaN.
+        // `a / b` is also +-0.0 or infinity or NaN.
+        // There is no need to round to an integer.
+        a / b
+    }
+}
+
+/// Returns `floor((a << exp) / b)`.
+///
+/// Requires `2^10 <= a, b < 2^11`.
+fn div_floor(exp: i16, a: u16, b: u16) -> f16 {
+    if exp < 0 {
+        0.0
+    } else if exp <= 5 {
+        // aa < (2^11 << 5) = 2^16
+        let aa = a << exp;
+        // q < 2^16 / 2^10 = 2^6
+        let q = (aa / b) as u8;
+        // We have to use `as` because `From<u8> for f16` is not yet implemented.
+        q as f16
+    } else if exp <= 16 {
+        // aa < (2^11 << 16) = 2^27
+        let aa = u32::from(a) << exp;
+        let bb = u32::from(b);
+        // q < 2^27 / 2^10 = 2^17
+        let q = aa / bb;
+        q as f16
+    } else {
+        // exp >= 17
+        // result >= (2^10 << 17) / 2^11 = 2^16
+        // Exponent 16 is too large to represent in f16.
+        f16::INFINITY
+    }
+}
+
+/// Returns `ceil((a << exp) / b)`.
+///
+/// Requires `2^10 <= a, b < 2^11`.
+fn div_ceil(exp: i16, a: u16, b: u16) -> f16 {
+    if exp < 0 {
+        1.0
+    } else if exp <= 5 {
+        // aa < (2^11 << 5) = 2^16
+        let aa = a << exp;
+        // q < 2^16 / 2^10 + 1 = 2^6 + 1
+        let q = ((aa - 1) / b) as u8 + 1;
+        // We have to use `as` because `From<u8> for f16` is not yet implemented.
+        q as f16
+    } else if exp <= 16 {
+        // aa <= ((2^11 - 1) << 16) = 2^27 - 2^16
+        let aa = u32::from(a) << exp;
+        let bb = u32::from(b);
+        // q <= (2^27 - 2^16) / 2^10 + 1 = 2^17 - 2^6 + 1
+        let q = ((aa - 1) / bb) as u16 + 1;
+        q as f16
+    } else {
+        // exp >= 17
+        // result >= (2^10 << 17) / 2^11 = 2^16
+        // Exponent 16 is too large to represent in f16.
+        f16::INFINITY
+    }
+}
+
+/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`.
+///
+/// `x = (-1)^sign * 2^exp * mantissa`
+///
+/// `2^10 <= mantissa < 2^11`
+fn normal_form(x: f16) -> Option<(bool, i16, u16)> {
+    let bits = x.to_bits();
+    let sign = bits >> 15 != 0;
+    let biased_exponent = (bits >> 10 & 0x1f) as i16;
+    let significand = bits & 0x3ff;
+    match biased_exponent {
+        0 if significand == 0 => {
+            // 0.0
+            None
+        }
+        0 => {
+            // Subnormal number: 2^(-14-10) * significand.
+            // We want mantissa to have exactly 5 leading zeros.
+            let shift = significand.leading_zeros() - 5;
+            Some((sign, -14 - 10 - shift as i16, significand << shift))
+        }
+        0x1f => {
+            // Infinity or NaN.
+            None
+        }
+        _ => {
+            // Normal number: 2^(biased_exponent-15-10) * (2^10 + significand)
+            Some((sign, biased_exponent - 15 - 10, 1 << 10 | significand))
+        }
+    }
+}
diff --git a/library/std/src/f16/div_euclid/tests.rs b/library/std/src/f16/div_euclid/tests.rs
new file mode 100644
index 0000000000000..e428f16c98d66
--- /dev/null
+++ b/library/std/src/f16/div_euclid/tests.rs
@@ -0,0 +1,13 @@
+#[cfg(reliable_f16_math)]
+#[test]
+fn test_normal_form() {
+    use crate::f16::div_euclid::normal_form;
+
+    assert_eq!(normal_form(-1.5f16), Some((true, -10, 0x600)));
+    assert_eq!(normal_form(f16::MIN_POSITIVE), Some((false, -24, 0x400)));
+    assert_eq!(normal_form(f16::MIN_POSITIVE / 2.0), Some((false, -25, 0x400)));
+    assert_eq!(normal_form(f16::MAX), Some((false, 5, 0x7ff)));
+    assert_eq!(normal_form(0.0), None);
+    assert_eq!(normal_form(f16::INFINITY), None);
+    assert_eq!(normal_form(f16::NAN), None);
+}
diff --git a/library/std/src/f16/tests.rs b/library/std/src/f16/tests.rs
index 684ee3f3855b8..4fc5f5d3dec09 100644
--- a/library/std/src/f16/tests.rs
+++ b/library/std/src/f16/tests.rs
@@ -442,6 +442,67 @@ fn test_mul_add() {
     assert_eq!((-3.2f16).mul_add(2.4, neg_inf), neg_inf);
 }
 
+#[test]
+#[cfg(reliable_f16_math)]
+fn test_div_euclid() {
+    use core::cmp::Ordering;
+
+    let nan = f16::NAN;
+    let inf = f16::INFINITY;
+    let largest_subnormal = f16::from_bits(LARGEST_SUBNORMAL_BITS);
+
+    // Infinity and NaN.
+    assert!(nan.div_euclid(5.0).is_nan());
+    assert!(inf.div_euclid(inf).is_nan());
+    assert!(0.0f16.div_euclid(0.0).is_nan());
+
+    assert_eq!(inf.div_euclid(3.0), inf);
+    assert_eq!(inf.div_euclid(0.0), inf);
+    assert_eq!(5.0f16.div_euclid(0.0), inf);
+    assert_eq!((-5.0f16).div_euclid(0.0), -inf);
+
+    // 0 / x
+    assert_eq!(Ordering::Equal, 0.0f16.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(10.0).total_cmp(&-0.0));
+
+    // small / large
+    assert_eq!(Ordering::Equal, 1.0f16.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!((-1.0f16).div_euclid(10.0), -1.0);
+    assert_eq!(Ordering::Equal, 1.0f16.div_euclid(-10.0).total_cmp(&-0.0));
+    assert_eq!((-1.0f16).div_euclid(-10.0), 1.0);
+
+    // Small results.
+    assert_eq!(20.0f16.div_euclid(10.0), 2.0);
+    assert_eq!((-20.0f16).div_euclid(10.0), -2.0);
+    assert_eq!(20.0f16.div_euclid(-10.0), -2.0);
+    assert_eq!((-20.0f16).div_euclid(-10.0), 2.0);
+
+    assert_eq!(23.0f16.div_euclid(10.0), 2.0);
+    assert_eq!((-23.0f16).div_euclid(10.0), -3.0);
+    assert_eq!(23.0f16.div_euclid(-10.0), -2.0);
+    assert_eq!((-23.0f16).div_euclid(-10.0), 3.0);
+
+    assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0);
+
+    // Large results, exactly divisible.
+    assert_eq!(1e4f16.div_euclid(2.0), 0.5e4);
+    assert_eq!((-1e4f16).div_euclid(2.0), -0.5e4);
+    assert_eq!(1e4f16.div_euclid(-2.0), -0.5e4);
+    assert_eq!((-1e4f16).div_euclid(-2.0), 0.5e4);
+
+    // Large results, not divisible.
+    assert_eq!(1003.0f16.div_euclid(10.0), 100.0);
+    assert_eq!((-1003.0f16).div_euclid(10.0), -101.0);
+    assert_eq!(1003.0f16.div_euclid(-10.0), -100.0);
+    assert_eq!((-1003.0f16).div_euclid(-10.0), 101.0);
+
+    // Infinite results.
+    assert_eq!(1000f16.div_euclid(0.001f16), inf);
+    assert_eq!((-1000f16).div_euclid(0.001f16), -inf);
+    assert_eq!(1000f16.div_euclid(-0.001f16), -inf);
+    assert_eq!((-1000f16).div_euclid(-0.001f16), inf);
+}
+
 #[test]
 #[cfg(reliable_f16_math)]
 fn test_recip() {
diff --git a/library/std/src/f32.rs b/library/std/src/f32.rs
index 7cb285bbff5f7..516f6ef0fac3b 100644
--- a/library/std/src/f32.rs
+++ b/library/std/src/f32.rs
@@ -12,6 +12,8 @@
 #![stable(feature = "rust1", since = "1.0.0")]
 #![allow(missing_docs)]
 
+mod div_euclid;
+
 #[cfg(test)]
 mod tests;
 
@@ -219,10 +221,14 @@ impl f32 {
 
     /// Calculates Euclidean division, the matching method for `rem_euclid`.
     ///
-    /// This computes the integer `n` such that
+    /// In infinite precision this computes the integer `n` such that
     /// `self = n * rhs + self.rem_euclid(rhs)`.
-    /// In other words, the result is `self / rhs` rounded to the integer `n`
-    /// such that `self >= n * rhs`.
+    /// In other words, the result is `self / rhs` rounded to the
+    /// integer `n` such that `self >= n * rhs`.
+    ///
+    /// However, due to a floating point round-off error the result can be rounded if
+    /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented
+    /// exactly.
     ///
     /// # Precision
     ///
@@ -244,29 +250,23 @@ impl f32 {
     #[inline]
     #[stable(feature = "euclidean_division", since = "1.38.0")]
     pub fn div_euclid(self, rhs: f32) -> f32 {
-        let q = (self / rhs).trunc();
-        if self % rhs < 0.0 {
-            return if rhs > 0.0 { q - 1.0 } else { q + 1.0 };
-        }
-        q
+        div_euclid::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
     ///
-    /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in
-    /// most cases. However, due to a floating point round-off error it can
-    /// result in `r == rhs.abs()`, violating the mathematical definition, if
-    /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
-    /// This result is not an element of the function's codomain, but it is the
-    /// closest floating point number in the real numbers and thus fulfills the
-    /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)`
-    /// approximately.
+    /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`.
+    ///
+    /// However, due to a floating point round-off error the result can round to
+    /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
     ///
     /// # Precision
     ///
     /// The result of this operation is guaranteed to be the rounded
     /// infinite-precision result.
     ///
+    /// The result is precise when `self >= 0.0`.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/library/std/src/f32/div_euclid.rs b/library/std/src/f32/div_euclid.rs
new file mode 100644
index 0000000000000..6040b82d26cb9
--- /dev/null
+++ b/library/std/src/f32/div_euclid.rs
@@ -0,0 +1,204 @@
+#[cfg(test)]
+mod tests;
+
+/// Software implementation of `f32::div_euclid`.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: f32, b: f32) -> f32 {
+    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
+        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
+    {
+        let exp = a_exp - b_exp;
+        match (a_neg, b_neg) {
+            (false, false) => div_floor(exp, a_m, b_m),
+            (true, false) => -div_ceil(exp, a_m, b_m),
+            (false, true) => -div_floor(exp, a_m, b_m),
+            (true, true) => div_ceil(exp, a_m, b_m),
+        }
+    } else {
+        // `a` or `b` are +-0.0 or infinity or NaN.
+        // `a / b` is also +-0.0 or infinity or NaN.
+        // There is no need to round to an integer.
+        a / b
+    }
+}
+
+/// Returns `floor((a << exp) / b)`.
+///
+/// Requires `2^23 <= a, b < 2^24`
+fn div_floor(exp: i32, a: u32, b: u32) -> f32 {
+    if exp < 0 {
+        0.0
+    } else if exp <= 8 {
+        // aa < (2^24 << 8) = 2^32
+        let aa = a << exp;
+        // q < 2^32 / 2^23 = 2^9
+        let q = (aa / b) as u16;
+        q.into()
+    } else if exp <= 31 {
+        // aa < (2^24 << 31) = 2^55
+        let aa = u64::from(a) << exp;
+        let bb = u64::from(b);
+        // q < 2^55 / 2^23 = 2^32
+        let q = (aa / bb) as u32;
+        q as f32
+    } else {
+        // aa >= (2^23 << 31) = 2^54
+        // aa < (2^24 << 31) = 2^55
+        let aa = u64::from(a) << 31;
+        let bb = u64::from(b);
+        // e > 0
+        // The result is floor((aa << e) / b).
+        let e = (exp - 31) as u32;
+
+        // aa = q * b + r
+        // q >= 2^54 / 2^24 = 2^30
+        // q < 2^55 / 2^23 = 2^32
+        let q = (aa / bb) as u32;
+        // 0 <= r < b
+        let r = (aa % bb) as u32;
+
+        // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
+        // 0 <= (r << e) / b < 2^e
+        //
+        // There are two cases:
+        // 1. floor((r << e) / b) = 0
+        // 2. 0 < floor((r << e) / b) < 2^e
+        //
+        // In case 1:
+        // The result is q << e.
+        //
+        // In case 2:
+        // The result is (q << e) + non-zero low e bits.
+        // This rounds the same way as (q | 1) << e because rounding beyond
+        // the 25 most significant bits of q depends only on whether the low-order
+        // bits are non-zero.
+        //
+        // Case 1 happens when:
+        // (r << e) / b < 1
+        // (r << e) <= b - 1
+        // r <= ((b - 1) >> e)
+        let case_1_bound = if e < 32 { (b - 1) >> e } else { 0 };
+        let q_adj = if r <= case_1_bound {
+            // Case 1.
+            q
+        } else {
+            // Case 2.
+            q | 1
+        };
+        q_adj as f32 * pow2(e)
+    }
+}
+
+/// Returns ceil((a << exp) / b).
+///
+/// Requires `2^23 <= a, b < 2^24`
+fn div_ceil(exp: i32, a: u32, b: u32) -> f32 {
+    if exp < 0 {
+        1.0
+    } else if exp <= 8 {
+        // aa < (2^24 << 8) = 2^32
+        let aa = a << exp;
+        // q < 2^32 / 2^23 + 1 = 2^9 + 1
+        let q = ((aa - 1) / b) as u16 + 1;
+        q.into()
+    } else if exp <= 31 {
+        // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31
+        let aa = u64::from(a) << exp;
+        let bb = u64::from(b);
+        // q <= (2^55 - 2^31) / 2^23 + 1 = 2^32 - 2^8 + 1
+        let q = ((aa - 1) / bb) as u32 + 1;
+        q as f32
+    } else {
+        // aa >= (2^23 << 31) = 2^54
+        // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31
+        let aa = u64::from(a) << 31;
+        let bb = u64::from(b);
+        // e > 0
+        // The result is ceil((aa << e) / b).
+        let e = (exp - 31) as u32;
+
+        // aa = q * b + r
+        // q >= 2^54 / 2^24 = 2^30
+        // q <= (2^55 - 2^31) / 2^23 = 2^32 - 2^8
+        let q = (aa / bb) as u32;
+        // 0 <= r < b
+        let r = (aa % bb) as u32;
+
+        // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
+        // 0 <= (r << e) / b < 2^e
+        //
+        // There are three cases:
+        // 1. ceil((r << e) / b) = 0
+        // 2. 0 < ceil((r << e) / b) < 2^e
+        // 3. ceil((r << e) / b) = 2^e
+        //
+        // In case 1:
+        // The result is q << e.
+        //
+        // In case 2:
+        // The result is (q << e) + non-zero low e bits.
+        // This rounds the same way as (q | 1) << e because rounding beyond
+        // the 25 most significant bits of q depends only on whether the low-order
+        // bits are non-zero.
+        //
+        // In case 3:
+        // The result is (q + 1) << e.
+        //
+        // Case 1 happens when r = 0.
+        // Case 3 happens when:
+        // (r << e) / b > (1 << e) - 1
+        // (r << e) > (b << e) - b
+        // ((b - r) << e) <= b - 1
+        // b - r <= (b - 1) >> e
+        // r >= b - ((b - 1) >> e)
+        let case_3_bound = b - if e < 32 { (b - 1) >> e } else { 0 };
+        let q_adj = if r == 0 {
+            // Case 1.
+            q
+        } else if r < case_3_bound {
+            // Case 2.
+            q | 1
+        } else {
+            // Case 3.
+            q + 1
+        };
+        q_adj as f32 * pow2(e)
+    }
+}
+
+/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`.
+///
+/// `x = (-1)^sign * 2^exp * mantissa`
+///
+/// `2^23 <= mantissa < 2^24`
+fn normal_form(x: f32) -> Option<(bool, i32, u32)> {
+    let bits = x.to_bits();
+    let sign = bits >> 31 != 0;
+    let biased_exponent = (bits >> 23 & 0xff) as i32;
+    let significand = bits & 0x7fffff;
+    match biased_exponent {
+        0 if significand == 0 => {
+            // 0.0
+            None
+        }
+        0 => {
+            // Subnormal number: 2^(-126-23) * significand.
+            // We want mantissa to have exactly 8 leading zeros.
+            let shift = significand.leading_zeros() - 8;
+            Some((sign, -126 - 23 - shift as i32, significand << shift))
+        }
+        0xff => {
+            // Infinity or NaN.
+            None
+        }
+        _ => {
+            // Normal number: 2^(biased_exponent-127-23) * (2^23 + significand)
+            Some((sign, biased_exponent - 127 - 23, 1 << 23 | significand))
+        }
+    }
+}
+
+/// Returns `2^exp`.
+fn pow2(exp: u32) -> f32 {
+    if exp <= 127 { f32::from_bits((exp + 127) << 23) } else { f32::INFINITY }
+}
diff --git a/library/std/src/f32/div_euclid/tests.rs b/library/std/src/f32/div_euclid/tests.rs
new file mode 100644
index 0000000000000..e1c139f7e16ed
--- /dev/null
+++ b/library/std/src/f32/div_euclid/tests.rs
@@ -0,0 +1,21 @@
+#[test]
+fn test_normal_form() {
+    use crate::f32::div_euclid::normal_form;
+
+    assert_eq!(normal_form(-1.5f32), Some((true, -23, 0xc00000)));
+    assert_eq!(normal_form(f32::MIN_POSITIVE), Some((false, -149, 0x800000)));
+    assert_eq!(normal_form(f32::MIN_POSITIVE / 2.0), Some((false, -150, 0x800000)));
+    assert_eq!(normal_form(f32::MAX), Some((false, 104, 0xffffff)));
+    assert_eq!(normal_form(0.0), None);
+    assert_eq!(normal_form(f32::INFINITY), None);
+    assert_eq!(normal_form(f32::NAN), None);
+}
+
+#[test]
+fn test_pow2() {
+    use crate::f32::div_euclid::pow2;
+
+    assert_eq!(pow2(0), 1.0f32);
+    assert_eq!(pow2(4), 16.0f32);
+    assert_eq!(pow2(128), f32::INFINITY);
+}
diff --git a/library/std/src/f32/tests.rs b/library/std/src/f32/tests.rs
index 99cfcfb231dad..895719432000e 100644
--- a/library/std/src/f32/tests.rs
+++ b/library/std/src/f32/tests.rs
@@ -423,6 +423,111 @@ fn test_mul_add() {
     assert_eq!((-3.2f32).mul_add(2.4, neg_inf), neg_inf);
 }
 
+#[test]
+fn test_div_euclid() {
+    use core::cmp::Ordering;
+
+    let nan = f32::NAN;
+    let inf = f32::INFINITY;
+    let largest_subnormal = f32::from_bits(LARGEST_SUBNORMAL_BITS);
+
+    // Infinity and NaN.
+    assert!(nan.div_euclid(5.0).is_nan());
+    assert!(inf.div_euclid(inf).is_nan());
+    assert!(0.0f32.div_euclid(0.0).is_nan());
+
+    assert_eq!(inf.div_euclid(3.0), inf);
+    assert_eq!(inf.div_euclid(0.0), inf);
+    assert_eq!(5.0f32.div_euclid(0.0), inf);
+    assert_eq!((-5.0f32).div_euclid(0.0), -inf);
+
+    // 0 / x
+    assert_eq!(Ordering::Equal, 0.0f32.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(10.0).total_cmp(&-0.0));
+
+    // small / large
+    assert_eq!(Ordering::Equal, 1.0f32.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!((-1.0f32).div_euclid(10.0), -1.0);
+    assert_eq!(Ordering::Equal, 1.0f32.div_euclid(-10.0).total_cmp(&-0.0));
+    assert_eq!((-1.0f32).div_euclid(-10.0), 1.0);
+
+    // Small results.
+    assert_eq!(20.0f32.div_euclid(10.0), 2.0);
+    assert_eq!((-20.0f32).div_euclid(10.0), -2.0);
+    assert_eq!(20.0f32.div_euclid(-10.0), -2.0);
+    assert_eq!((-20.0f32).div_euclid(-10.0), 2.0);
+
+    assert_eq!(23.0f32.div_euclid(10.0), 2.0);
+    assert_eq!((-23.0f32).div_euclid(10.0), -3.0);
+    assert_eq!(23.0f32.div_euclid(-10.0), -2.0);
+    assert_eq!((-23.0f32).div_euclid(-10.0), 3.0);
+
+    assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0);
+
+    // Medium results.
+    assert_eq!(1000000.0f32.div_euclid(10.0), 100000.0);
+    assert_eq!((-1000000.0f32).div_euclid(10.0), -100000.0);
+    assert_eq!(1000000.0f32.div_euclid(-10.0), -100000.0);
+    assert_eq!((-1000000.0f32).div_euclid(-10.0), 100000.0);
+
+    assert_eq!(1000003.0f32.div_euclid(10.0), 100000.0);
+    assert_eq!((-1000003.0f32).div_euclid(10.0), -100001.0);
+    assert_eq!(1000003.0f32.div_euclid(-10.0), -100000.0);
+    assert_eq!((-1000003.0f32).div_euclid(-10.0), 100001.0);
+
+    // Infinite results.
+    assert_eq!(1e30f32.div_euclid(1e-30), inf);
+    assert_eq!((-1e30f32).div_euclid(1e-30), -inf);
+    assert_eq!(1e30f32.div_euclid(-1e-30), -inf);
+    assert_eq!((-1e30f32).div_euclid(-1e-30), inf);
+
+    // Large results, exactly divisible.
+    assert_eq!(1e30f32.div_euclid(2.0), 0.5e30);
+    assert_eq!((-1e30f32).div_euclid(2.0), -0.5e30);
+    assert_eq!(1e30f32.div_euclid(-2.0), -0.5e30);
+    assert_eq!((-1e30f32).div_euclid(-2.0), 0.5e30);
+
+    // Large results, not divisible.
+    let a = 3377699720527872f32; // 3 * 2^50
+    let b = 7f32;
+    // a / b = 0x1b6db6db6db6d.b6... = 482528531503981.71...
+    // floor(a / b) = 0x1b6db6db6db6d
+    // ceil(a / b) = 0x1b6db6db6db6e
+    // Both round to 0x1b6db6e000000 = 482528536297472
+    assert_eq!(a.div_euclid(b), 482528536297472f32);
+    assert_eq!((-a).div_euclid(-b), 482528536297472f32);
+
+    // Large results, test precise rounding edge cases.
+
+    // a = 0x7dccd9000000000 = 566553671500824576
+    // b = 0xed5c1c = 5555612
+    let a = 566553671500824576f32;
+    let b = 15555612f32;
+
+    // a / b = 0x87adf0800.c9... = 36421175296.78...
+    // floor(a / b) = 0x87adf0800
+    // Rounds down to 0x87adf0000 = 36421173248
+    assert_eq!(a.div_euclid(b), 36421173248f32);
+
+    // ceil(a / b) = 0x87adf0801
+    // Rounds up to 0x87adf1000 = 36421177344
+    assert_eq!((-a).div_euclid(-b), 36421177344f32);
+
+    // a = 0x7a83d5800000000 = 551758402519302144
+    // b = 0x868b0f = 8817423
+    let a = 551758402519302144f32;
+    let b = 8817423f32;
+
+    // a / b = 0xe91d0d7ff.03... = 62575925247.01...
+    // floor(a / b) = 0xe91d0d7ff
+    // Rounds down to 0xe91d0d000 = 62575923200
+    assert_eq!(a.div_euclid(b), 62575923200f32);
+
+    // ceil(a / b) = 0xe91d0d800
+    // Rounds up to 0xe91d0e000 = 62575927296
+    assert_eq!((-a).div_euclid(-b), 62575927296f32);
+}
+
 #[test]
 fn test_recip() {
     let nan: f32 = f32::NAN;
diff --git a/library/std/src/f64.rs b/library/std/src/f64.rs
index 47163c272de32..fb17857ec6d94 100644
--- a/library/std/src/f64.rs
+++ b/library/std/src/f64.rs
@@ -12,6 +12,8 @@
 #![stable(feature = "rust1", since = "1.0.0")]
 #![allow(missing_docs)]
 
+mod div_euclid;
+
 #[cfg(test)]
 mod tests;
 
@@ -219,10 +221,14 @@ impl f64 {
 
     /// Calculates Euclidean division, the matching method for `rem_euclid`.
     ///
-    /// This computes the integer `n` such that
+    /// In infinite precision this computes the integer `n` such that
     /// `self = n * rhs + self.rem_euclid(rhs)`.
-    /// In other words, the result is `self / rhs` rounded to the integer `n`
-    /// such that `self >= n * rhs`.
+    /// In other words, the result is `self / rhs` rounded to the
+    /// integer `n` such that `self >= n * rhs`.
+    ///
+    /// However, due to a floating point round-off error the result can be rounded if
+    /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented
+    /// exactly.
     ///
     /// # Precision
     ///
@@ -244,29 +250,23 @@ impl f64 {
     #[inline]
     #[stable(feature = "euclidean_division", since = "1.38.0")]
     pub fn div_euclid(self, rhs: f64) -> f64 {
-        let q = (self / rhs).trunc();
-        if self % rhs < 0.0 {
-            return if rhs > 0.0 { q - 1.0 } else { q + 1.0 };
-        }
-        q
+        div_euclid::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
     ///
-    /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in
-    /// most cases. However, due to a floating point round-off error it can
-    /// result in `r == rhs.abs()`, violating the mathematical definition, if
-    /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
-    /// This result is not an element of the function's codomain, but it is the
-    /// closest floating point number in the real numbers and thus fulfills the
-    /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)`
-    /// approximately.
+    /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`.
+    ///
+    /// However, due to a floating point round-off error the result can round to
+    /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
     ///
     /// # Precision
     ///
     /// The result of this operation is guaranteed to be the rounded
     /// infinite-precision result.
     ///
+    /// The result is precise when `self >= 0.0`.
+    ///
     /// # Examples
     ///
     /// ```
diff --git a/library/std/src/f64/div_euclid.rs b/library/std/src/f64/div_euclid.rs
new file mode 100644
index 0000000000000..f4346dd43afff
--- /dev/null
+++ b/library/std/src/f64/div_euclid.rs
@@ -0,0 +1,204 @@
+#[cfg(test)]
+mod tests;
+
+/// Software implementation of `f64::div_euclid`.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: f64, b: f64) -> f64 {
+    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
+        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
+    {
+        let exp = a_exp - b_exp;
+        match (a_neg, b_neg) {
+            (false, false) => div_floor(exp, a_m, b_m),
+            (true, false) => -div_ceil(exp, a_m, b_m),
+            (false, true) => -div_floor(exp, a_m, b_m),
+            (true, true) => div_ceil(exp, a_m, b_m),
+        }
+    } else {
+        // `a` or `b` are +-0.0 or infinity or NaN.
+        // `a / b` is also +-0.0 or infinity or NaN.
+        // There is no need to round to an integer.
+        a / b
+    }
+}
+
+/// Returns `floor((a << exp) / b)`.
+///
+/// Requires `2^52 <= a, b < 2^53`.
+fn div_floor(exp: i32, a: u64, b: u64) -> f64 {
+    if exp < 0 {
+        0.0
+    } else if exp <= 11 {
+        // aa < (2^53 << 11) = 2^64
+        let aa = a << exp;
+        // q < 2^64 / 2^52 = 2^12
+        let q = (aa / b) as u32;
+        q.into()
+    } else if exp <= 63 {
+        // aa < (2^53 << 63) = 2^116
+        let aa = u128::from(a) << exp;
+        let bb = u128::from(b);
+        // q < 2^116 / 2^52 = 2^64
+        let q = (aa / bb) as u64;
+        q as f64
+    } else {
+        // aa >= (2^52 << 63) = 2^115
+        // aa < (2^53 << 63) = 2^116
+        let aa = u128::from(a) << 63;
+        let bb = u128::from(b);
+        // e > 0
+        // The result is floor((aa << e) / b).
+        let e = (exp - 63) as u32;
+
+        // aa = q * b + r
+        // q >= 2^115 / 2^53 = 2^62
+        // q < 2^116 / 2^52 = 2^64
+        let q = (aa / bb) as u64;
+        // 0 <= r < b
+        let r = (aa % bb) as u64;
+
+        // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
+        // 0 <= (r << e) / b < 2^e
+        //
+        // There are two cases:
+        // 1. floor((r << e) / b) = 0
+        // 2. 0 < floor((r << e) / b) < 2^e
+        //
+        // In case 1:
+        // The result is q << e.
+        //
+        // In case 2:
+        // The result is (q << e) + non-zero low e bits.
+        // This rounds the same way as (q | 1) << e because rounding beyond
+        // the 25 most significant bits of q depends only on whether the low-order
+        // bits are non-zero.
+        //
+        // Case 1 happens when:
+        // (r << e) / b < 1
+        // (r << e) <= b - 1
+        // r <= ((b - 1) >> e)
+        let case_1_bound = if e < 64 { (b - 1) >> e } else { 0 };
+        let q_adj = if r <= case_1_bound {
+            // Case 1.
+            q
+        } else {
+            // Case 2.
+            q | 1
+        };
+        q_adj as f64 * pow2(e)
+    }
+}
+
+/// Returns `ceil((a << exp) / b)`.
+///
+/// Requires `2^52 <= a, b < 2^53`.
+fn div_ceil(exp: i32, a: u64, b: u64) -> f64 {
+    if exp < 0 {
+        1.0
+    } else if exp <= 11 {
+        // aa < (2^53 << 11) = 2^64
+        let aa = a << exp;
+        // q < 2^64 / 2^52 + 1 = 2^12 + 1
+        let q = ((aa - 1) / b) as u32 + 1;
+        q.into()
+    } else if exp <= 63 {
+        // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63
+        let aa = u128::from(a) << exp;
+        let bb = u128::from(b);
+        // q <= (2^116 - 2^63) / 2^52 + 1 = 2^64 - 2^11 + 1
+        let q = ((aa - 1) / bb) as u64 + 1;
+        q as f64
+    } else {
+        // aa >= (2^52 << 63) = 2^115
+        // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63
+        let aa = u128::from(a) << 63;
+        let bb = u128::from(b);
+        // e > 0
+        // The result is ceil((aa << e) / b).
+        let e = (exp - 63) as u32;
+
+        // aa = q * b + r
+        // q >= 2^115 / 2^53 = 2^62
+        // q <= (2^116 - 2^63) / 2^52 = 2^64 - 2^11
+        let q = (aa / bb) as u64;
+        // 0 <= r < b
+        let r = (aa % bb) as u64;
+
+        // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
+        // 0 <= (r << e) / b < 2^e
+        //
+        // There are three cases:
+        // 1. ceil((r << e) / b) = 0
+        // 2. 0 < ceil((r << e) / b) < 2^e
+        // 3. ceil((r << e) / b) = 2^e
+        //
+        // In case 1:
+        // The result is q << e.
+        //
+        // In case 2:
+        // The result is (q << e) + non-zero low e bits.
+        // This rounds the same way as (q | 1) << e because rounding beyond
+        // the 54 most significant bits of q depends only on whether the low-order
+        // bits are non-zero.
+        //
+        // In case 3:
+        // The result is (q + 1) << e.
+        //
+        // Case 1 happens when r = 0.
+        // Case 3 happens when:
+        // (r << e) / b > (1 << e) - 1
+        // (r << e) > (b << e) - b
+        // ((b - r) << e) <= b - 1
+        // b - r <= (b - 1) >> e
+        // r >= b - ((b - 1) >> e)
+        let case_3_bound = b - if e < 64 { (b - 1) >> e } else { 0 };
+        let q_adj = if r == 0 {
+            // Case 1.
+            q
+        } else if r < case_3_bound {
+            // Case 2.
+            q | 1
+        } else {
+            // Case 3.
+            q + 1
+        };
+        q_adj as f64 * pow2(e)
+    }
+}
+
+/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`.
+///
+/// `x = (-1)^sign * 2^exp * mantissa`
+///
+/// `2^52 <= mantissa < 2^53`
+fn normal_form(x: f64) -> Option<(bool, i32, u64)> {
+    let bits = x.to_bits();
+    let sign = bits >> 63 != 0;
+    let biased_exponent = (bits >> 52 & 0x7ff) as i32;
+    let significand = bits & ((1 << 52) - 1);
+    match biased_exponent {
+        0 if significand == 0 => {
+            // 0.0
+            None
+        }
+        0 => {
+            // Subnormal number: 2^(-1022-52) * significand.
+            // We want mantissa to have exactly 11 leading zeros.
+            let shift = significand.leading_zeros() - 11;
+            Some((sign, -1022 - 52 - shift as i32, significand << shift))
+        }
+        0x7ff => {
+            // Infinity or NaN.
+            None
+        }
+        _ => {
+            // Normal number: 2^(biased_exponent-1023-52) * (2^52 + significand)
+            Some((sign, biased_exponent - 1023 - 52, 1 << 52 | significand))
+        }
+    }
+}
+
+/// Returns `2^exp`.
+fn pow2(exp: u32) -> f64 {
+    if exp <= 1023 { f64::from_bits(u64::from(exp + 1023) << 52) } else { f64::INFINITY }
+}
diff --git a/library/std/src/f64/div_euclid/tests.rs b/library/std/src/f64/div_euclid/tests.rs
new file mode 100644
index 0000000000000..eeb43a6b7d868
--- /dev/null
+++ b/library/std/src/f64/div_euclid/tests.rs
@@ -0,0 +1,21 @@
+#[test]
+fn test_normal_form() {
+    use crate::f64::div_euclid::normal_form;
+
+    assert_eq!(normal_form(-1.5f64), Some((true, -52, 3 << 51)));
+    assert_eq!(normal_form(f64::MIN_POSITIVE), Some((false, -1074, 1 << 52)));
+    assert_eq!(normal_form(f64::MIN_POSITIVE / 2.0), Some((false, -1075, 1 << 52)));
+    assert_eq!(normal_form(f64::MAX), Some((false, 971, (1 << 53) - 1)));
+    assert_eq!(normal_form(0.0), None);
+    assert_eq!(normal_form(f64::INFINITY), None);
+    assert_eq!(normal_form(f64::NAN), None);
+}
+
+#[test]
+fn test_pow2() {
+    use crate::f64::div_euclid::pow2;
+
+    assert_eq!(pow2(0), 1.0f64);
+    assert_eq!(pow2(4), 16.0f64);
+    assert_eq!(pow2(1024), f64::INFINITY);
+}
diff --git a/library/std/src/f64/tests.rs b/library/std/src/f64/tests.rs
index 3fac2efe0d76c..0499cc4516b4e 100644
--- a/library/std/src/f64/tests.rs
+++ b/library/std/src/f64/tests.rs
@@ -411,6 +411,122 @@ fn test_mul_add() {
     assert_eq!((-3.2f64).mul_add(2.4, neg_inf), neg_inf);
 }
 
+#[test]
+fn test_div_euclid() {
+    use core::cmp::Ordering;
+
+    let nan = f64::NAN;
+    let inf = f64::INFINITY;
+    let largest_subnormal = f64::from_bits(LARGEST_SUBNORMAL_BITS);
+
+    // Infinity and NaN.
+    assert!(nan.div_euclid(5.0).is_nan());
+    assert!(inf.div_euclid(inf).is_nan());
+    assert!(0.0f64.div_euclid(0.0).is_nan());
+
+    assert_eq!(inf.div_euclid(3.0), inf);
+    assert_eq!(inf.div_euclid(0.0), inf);
+    assert_eq!(5.0f64.div_euclid(0.0), inf);
+    assert_eq!((-5.0f64).div_euclid(0.0), -inf);
+
+    // 0 / x
+    assert_eq!(Ordering::Equal, 0.0f64.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(10.0).total_cmp(&-0.0));
+
+    // small / large
+    assert_eq!(Ordering::Equal, 1.0f64.div_euclid(10.0).total_cmp(&0.0));
+    assert_eq!((-1.0f64).div_euclid(10.0), -1.0);
+    assert_eq!(Ordering::Equal, 1.0f64.div_euclid(-10.0).total_cmp(&-0.0));
+    assert_eq!((-1.0f64).div_euclid(-10.0), 1.0);
+
+    // Small results.
+    assert_eq!(20.0f64.div_euclid(10.0), 2.0);
+    assert_eq!((-20.0f64).div_euclid(10.0), -2.0);
+    assert_eq!(20.0f64.div_euclid(-10.0), -2.0);
+    assert_eq!((-20.0f64).div_euclid(-10.0), 2.0);
+
+    assert_eq!(23.0f64.div_euclid(10.0), 2.0);
+    assert_eq!((-23.0f64).div_euclid(10.0), -3.0);
+    assert_eq!(23.0f64.div_euclid(-10.0), -2.0);
+    assert_eq!((-23.0f64).div_euclid(-10.0), 3.0);
+
+    assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0);
+
+    // Issue #107904
+    let a = -0.7500000000000006f64;
+    let b = 0.0833333333333334f64;
+    assert_eq!(a.div_euclid(b), -9.0);
+
+    let a = 46.11764705882354f64;
+    let b = 1.6470588235294124f64;
+    assert_eq!(a.div_euclid(b), 27.0);
+
+    // Medium results.
+    assert_eq!(1000000.0f64.div_euclid(10.0), 100000.0);
+    assert_eq!((-1000000.0f64).div_euclid(10.0), -100000.0);
+    assert_eq!(1000000.0f64.div_euclid(-10.0), -100000.0);
+    assert_eq!((-1000000.0f64).div_euclid(-10.0), 100000.0);
+
+    assert_eq!(1000003.0f64.div_euclid(10.0), 100000.0);
+    assert_eq!((-1000003.0f64).div_euclid(10.0), -100001.0);
+    assert_eq!(1000003.0f64.div_euclid(-10.0), -100000.0);
+    assert_eq!((-1000003.0f64).div_euclid(-10.0), 100001.0);
+
+    // Large results, exactly divisible.
+    assert_eq!(1e300f64.div_euclid(2.0), 0.5e300);
+    assert_eq!((-1e300f64).div_euclid(2.0), -0.5e300);
+    assert_eq!(1e300f64.div_euclid(-2.0), -0.5e300);
+    assert_eq!((-1e300f64).div_euclid(-2.0), 0.5e300);
+
+    // Infinite results.
+    assert_eq!(1e300f64.div_euclid(1e-300), inf);
+    assert_eq!((-1e300f64).div_euclid(1e-300), -inf);
+    assert_eq!(1e300f64.div_euclid(-1e-300), -inf);
+    assert_eq!((-1e300f64).div_euclid(-1e-300), inf);
+
+    // Large results, not divisible.
+    let a = 3802951800684688204490109616128f64; // 3 * 2^100
+    let b = 7f64;
+    // a / b = 0x6db6db6db6db6db6db6db6db6.db...
+    //   = 543278828669241172070015659446.85...
+    // floor(a / b) = 0x6db6db6db6db6db6db6db6db6
+    // ceil(a / b) = 0x6db6db6db6db6db6db6db6db7
+    // Both round to 0x6db6db6db6db6c00000000000
+    //   = 543278828669241141911982440448
+    assert_eq!(a.div_euclid(b), 543278828669241141911982440448f64);
+    assert_eq!((-a).div_euclid(-b), 543278828669241141911982440448f64);
+
+    // Large results, test precise rounding edge cases.
+
+    // a = 0xc3b75b9c77bfa80000000000000000 = 1016216826558977217020954221979107328
+    // b = 0x130594ed7aaacb = 5354161755040459
+    let a = 1016216826558977217020954221979107328f64;
+    let b = 5354161755040459f64;
+
+    // a / b = 0xa49ff0421f8f94000.0a... = 189799425764882989056.03...
+    // floor(a / b) = 0xa49ff0421f8f94000
+    // Rounds down to 0xa49ff0421f8f90000 = 189799425764882972672
+    assert_eq!(a.div_euclid(b), 189799425764882972672f64);
+
+    // ceil(a / b) = 0xa49ff0421f8f94001
+    // Rounds up to 0xa49ff0421f8f98000 = 189799425764883005440
+    assert_eq!((-a).div_euclid(-b), 189799425764883005440f64);
+
+    // a = 0xee67640bd3b6880000000000000000 = 1237863666996996959508242859395383296
+    // b = 0x15409d10e91300 = 5982017848677120
+    // a / b = 0xb37bdd747bac8bfff.5b... = 206930787956565786623.35...
+    let a = 1237863666996996959508242859395383296f64;
+    let b = 5982017848677120f64;
+
+    // floor(a / b) = 0xb37bdd747bac8bfff
+    // Rounds down to 0xb37bdd747bac88000 = 206930787956565770240
+    assert_eq!(a.div_euclid(b), 206930787956565770240f64);
+
+    // ceil(a / b) = 0xb37bdd747bac8c000
+    // Rounds up to 0xb37bdd747bac90000 =
+    assert_eq!((-a).div_euclid(-b), 206930787956565803008f64);
+}
+
 #[test]
 fn test_recip() {
     let nan: f64 = f64::NAN;
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index 49a0322003905..dbbfed619559d 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -321,6 +321,7 @@
 // Library features (core):
 // tidy-alphabetical-start
 #![feature(array_chunks)]
+#![feature(bigint_helper_methods)]
 #![feature(c_str_module)]
 #![feature(char_internals)]
 #![feature(clone_to_uninit)]

From 4534005bc0bf868371c86fcdfb3b53f2549101f4 Mon Sep 17 00:00:00 2001
From: Tomek Czajka <tczajka@gmail.com>
Date: Wed, 11 Dec 2024 09:43:08 +0100
Subject: [PATCH 2/5] Fix overflow bug in f16::div_ceil

---
 library/std/src/f16/div_euclid.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/std/src/f16/div_euclid.rs b/library/std/src/f16/div_euclid.rs
index d1872f00f191c..df770c3af1c25 100644
--- a/library/std/src/f16/div_euclid.rs
+++ b/library/std/src/f16/div_euclid.rs
@@ -68,7 +68,7 @@ fn div_ceil(exp: i16, a: u16, b: u16) -> f16 {
         let aa = u32::from(a) << exp;
         let bb = u32::from(b);
         // q <= (2^27 - 2^16) / 2^10 + 1 = 2^17 - 2^6 + 1
-        let q = ((aa - 1) / bb) as u16 + 1;
+        let q = (aa - 1) / bb + 1;
         q as f16
     } else {
         // exp >= 17

From cedc71861d2259aa62e46d9135e6b2dc7fd10fb2 Mon Sep 17 00:00:00 2001
From: Tomek Czajka <tczajka@gmail.com>
Date: Wed, 11 Dec 2024 18:44:16 +0100
Subject: [PATCH 3/5] Software `div_euclid` refactoring.

`f{16,32,64,128}/soft.rs` are now very similar. f32 and f64 almost
identical.

`U256` helper moved to `crate::u256`.

`(-5f32).div_euclid(f32::INFINITY)` now returns -1 not 0.
---
 library/std/src/f128.rs                  |   5 +-
 library/std/src/f128/div_euclid.rs       | 204 -------------
 library/std/src/f128/div_euclid/tests.rs |  23 --
 library/std/src/f128/soft.rs             | 350 +++++++++++++++++++++++
 library/std/src/f128/soft/tests.rs       |  49 ++++
 library/std/src/f128/tests.rs            |  12 +-
 library/std/src/f128/u256.rs             |  63 ----
 library/std/src/f128/u256/tests.rs       |  39 ---
 library/std/src/f16.rs                   |   4 +-
 library/std/src/f16/div_euclid.rs        | 111 -------
 library/std/src/f16/div_euclid/tests.rs  |  13 -
 library/std/src/f16/soft.rs              | 213 ++++++++++++++
 library/std/src/f16/soft/tests.rs        |  39 +++
 library/std/src/f16/tests.rs             |  11 +-
 library/std/src/f32.rs                   |   4 +-
 library/std/src/f32/div_euclid.rs        | 204 -------------
 library/std/src/f32/div_euclid/tests.rs  |  21 --
 library/std/src/f32/soft.rs              | 344 ++++++++++++++++++++++
 library/std/src/f32/soft/tests.rs        |  49 ++++
 library/std/src/f32/tests.rs             |  11 +
 library/std/src/f64.rs                   |   4 +-
 library/std/src/f64/div_euclid.rs        | 204 -------------
 library/std/src/f64/div_euclid/tests.rs  |  21 --
 library/std/src/f64/soft.rs              | 344 ++++++++++++++++++++++
 library/std/src/f64/soft/tests.rs        |  49 ++++
 library/std/src/f64/tests.rs             |  10 +
 library/std/src/lib.rs                   |   1 +
 library/std/src/u256.rs                  | 117 ++++++++
 library/std/src/u256/tests.rs            |  66 +++++
 29 files changed, 1670 insertions(+), 915 deletions(-)
 delete mode 100644 library/std/src/f128/div_euclid.rs
 delete mode 100644 library/std/src/f128/div_euclid/tests.rs
 create mode 100644 library/std/src/f128/soft.rs
 create mode 100644 library/std/src/f128/soft/tests.rs
 delete mode 100644 library/std/src/f128/u256.rs
 delete mode 100644 library/std/src/f128/u256/tests.rs
 delete mode 100644 library/std/src/f16/div_euclid.rs
 delete mode 100644 library/std/src/f16/div_euclid/tests.rs
 create mode 100644 library/std/src/f16/soft.rs
 create mode 100644 library/std/src/f16/soft/tests.rs
 delete mode 100644 library/std/src/f32/div_euclid.rs
 delete mode 100644 library/std/src/f32/div_euclid/tests.rs
 create mode 100644 library/std/src/f32/soft.rs
 create mode 100644 library/std/src/f32/soft/tests.rs
 delete mode 100644 library/std/src/f64/div_euclid.rs
 delete mode 100644 library/std/src/f64/div_euclid/tests.rs
 create mode 100644 library/std/src/f64/soft.rs
 create mode 100644 library/std/src/f64/soft/tests.rs
 create mode 100644 library/std/src/u256.rs
 create mode 100644 library/std/src/u256/tests.rs

diff --git a/library/std/src/f128.rs b/library/std/src/f128.rs
index 56f3df1b4aa95..a206ef8fa47c9 100644
--- a/library/std/src/f128.rs
+++ b/library/std/src/f128.rs
@@ -4,8 +4,7 @@
 //!
 //! Mathematically significant numbers are provided in the `consts` sub-module.
 
-mod div_euclid;
-mod u256;
+mod soft;
 
 #[cfg(test)]
 mod tests;
@@ -271,7 +270,7 @@ impl f128 {
     #[unstable(feature = "f128", issue = "116909")]
     #[must_use = "method returns a new number and does not mutate the original value"]
     pub fn div_euclid(self, rhs: f128) -> f128 {
-        div_euclid::div_euclid(self, rhs)
+        soft::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
diff --git a/library/std/src/f128/div_euclid.rs b/library/std/src/f128/div_euclid.rs
deleted file mode 100644
index 38334ac674aaf..0000000000000
--- a/library/std/src/f128/div_euclid.rs
+++ /dev/null
@@ -1,204 +0,0 @@
-#[cfg(test)]
-mod tests;
-
-use crate::f128::u256::U256;
-
-/// Software implementation of `f128::div_euclid`.
-#[allow(dead_code)]
-pub(crate) fn div_euclid(a: f128, b: f128) -> f128 {
-    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
-        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
-    {
-        let exp = a_exp - b_exp;
-        match (a_neg, b_neg) {
-            (false, false) => div_floor(exp, a_m, b_m),
-            (true, false) => -div_ceil(exp, a_m, b_m),
-            (false, true) => -div_floor(exp, a_m, b_m),
-            (true, true) => div_ceil(exp, a_m, b_m),
-        }
-    } else {
-        // `a` or `b` are +-0.0 or infinity or NaN.
-        // `a / b` is also +-0.0 or infinity or NaN.
-        // There is no need to round to an integer.
-        a / b
-    }
-}
-
-/// Returns `floor((a << exp) / b)`.
-///
-/// Requires `2^112 <= a, b < 2^113`.
-fn div_floor(exp: i32, a: u128, b: u128) -> f128 {
-    if exp < 0 {
-        0.0
-    } else if exp <= 15 {
-        // aa < (2^113 << 15) = 2^128
-        let aa = a << exp;
-        // q < 2^128 / 2^112 = 2^16
-        let q = (aa / b) as u32;
-        // We have to use `as` because `From<u32> for f128` is not yet implemented.
-        q as f128
-    } else if exp <= 127 {
-        // aa = a << exp
-        // aa < (2^113 << 127) = 2^240
-        let aa = U256::shl_u128(a, exp as u32);
-        // q < 2^240 / 2^112 = 2^128
-        let (q, _) = aa.div_rem(b);
-        q as f128
-    } else {
-        // aa >= (2^112 << 127) = 2^239
-        // aa < (2^113 << 127) = 2^240
-        let aa = U256::shl_u128(a, 127);
-        // e > 0
-        // The result is floor((aa << e) / b).
-        let e = (exp - 127) as u32;
-
-        // aa = q * b + r
-        // q >= 2^239 / 2^113 = 2^126
-        // q < 2^239 / 2^112 = 2^128
-        // 0 <= r < b
-        let (q, r) = aa.div_rem(b);
-
-        // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
-        // 0 <= (r << e) / b < 2^e
-        //
-        // There are two cases:
-        // 1. floor((r << e) / b) = 0
-        // 2. 0 < floor((r << e) / b) < 2^e
-        //
-        // In case 1:
-        // The result is q << e.
-        //
-        // In case 2:
-        // The result is (q << e) + non-zero low e bits.
-        // This rounds the same way as (q | 1) << e because rounding beyond
-        // the 25 most significant bits of q depends only on whether the low-order
-        // bits are non-zero.
-        //
-        // Case 1 happens when:
-        // (r << e) / b < 1
-        // (r << e) <= b - 1
-        // r <= ((b - 1) >> e)
-        let case_1_bound = if e < 128 { (b - 1) >> e } else { 0 };
-        let q_adj = if r <= case_1_bound {
-            // Case 1.
-            q
-        } else {
-            // Case 2.
-            q | 1
-        };
-        q_adj as f128 * pow2(e)
-    }
-}
-
-/// Returns `ceil((a << exp) / b)`.
-///
-/// Requires `2^112 <= a, b < 2^113`.
-fn div_ceil(exp: i32, a: u128, b: u128) -> f128 {
-    if exp < 0 {
-        1.0
-    } else if exp <= 15 {
-        // aa < (2^113 << 15) = 2^128
-        let aa = a << exp;
-        // q < 2^128 / 2^112 + 1 = 2^16 + 1
-        let q = ((aa - 1) / b) as u32 + 1;
-        // We have to use `as` because `From<u32> for f128` is not yet implemented.
-        q as f128
-    } else if exp <= 127 {
-        // aa = a << exp
-        // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
-        let aa = U256::shl_u128(a, exp as u32);
-        // q <= (2^240 - 2^127) / 2^112 + 1 = 2^128 - 2^15 + 1
-        let (q, _) = (aa - U256::ONE).div_rem(b);
-        (q + 1) as f128
-    } else {
-        // aa >= (2^112 << 127) = 2^239
-        // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
-        let aa = U256::shl_u128(a, 127);
-        // e > 0
-        // The result is ceil((aa << e) / b).
-        let e = (exp - 127) as u32;
-
-        // aa = q * b + r
-        // q >= 2^239 / 2^112 = 2^126
-        // q <= (2^240 - 2^127) / 2^112 = 2^128 - 2^15
-        // 0 <= r < b
-        let (q, r) = aa.div_rem(b);
-
-        // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
-        // 0 <= (r << e) / b < 2^e
-        //
-        // There are three cases:
-        // 1. ceil((r << e) / b) = 0
-        // 2. 0 < ceil((r << e) / b) < 2^e
-        // 3. ceil((r << e) / b) = 2^e
-        //
-        // In case 1:
-        // The result is q << e.
-        //
-        // In case 2:
-        // The result is (q << e) + non-zero low e bits.
-        // This rounds the same way as (q | 1) << e because rounding beyond
-        // the 54 most significant bits of q depends only on whether the low-order
-        // bits are non-zero.
-        //
-        // In case 3:
-        // The result is (q + 1) << e.
-        //
-        // Case 1 happens when r = 0.
-        // Case 3 happens when:
-        // (r << e) / b > (1 << e) - 1
-        // (r << e) > (b << e) - b
-        // ((b - r) << e) <= b - 1
-        // b - r <= (b - 1) >> e
-        // r >= b - ((b - 1) >> e)
-        let case_3_bound = b - if e < 128 { (b - 1) >> e } else { 0 };
-        let q_adj = if r == 0 {
-            // Case 1.
-            q
-        } else if r < case_3_bound {
-            // Case 2.
-            q | 1
-        } else {
-            // Case 3.
-            q + 1
-        };
-        q_adj as f128 * pow2(e)
-    }
-}
-
-/// For finite, non-zero numbers returns (sign, exponent, mantissa).
-///
-/// `x = (-1)^sign * 2^exp * mantissa`
-///
-/// `2^112 <= mantissa < 2^113`
-fn normal_form(x: f128) -> Option<(bool, i32, u128)> {
-    let bits = x.to_bits();
-    let sign = bits >> 127 != 0;
-    let biased_exponent = (bits >> 112 & 0x7fff) as i32;
-    let significand = bits & ((1 << 112) - 1);
-    match biased_exponent {
-        0 if significand == 0 => {
-            // 0.0
-            None
-        }
-        0 => {
-            // Subnormal number: 2^(-16382-112) * significand.
-            // We want mantissa to have exactly 15 leading zeros.
-            let shift = significand.leading_zeros() - 15;
-            Some((sign, -16382 - 112 - shift as i32, significand << shift))
-        }
-        0x7fff => {
-            // Infinity or NaN.
-            None
-        }
-        _ => {
-            // Normal number: 2^(biased_exponent-16383-112) * (2^112 + significand)
-            Some((sign, biased_exponent - 16383 - 112, 1 << 112 | significand))
-        }
-    }
-}
-
-/// Returns `2^exp`.
-fn pow2(exp: u32) -> f128 {
-    if exp <= 16383 { f128::from_bits(u128::from(exp + 16383) << 112) } else { f128::INFINITY }
-}
diff --git a/library/std/src/f128/div_euclid/tests.rs b/library/std/src/f128/div_euclid/tests.rs
deleted file mode 100644
index a89004bf10ea8..0000000000000
--- a/library/std/src/f128/div_euclid/tests.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-#![cfg(reliable_f128_math)]
-
-#[test]
-fn test_normal_form() {
-    use crate::f128::div_euclid::normal_form;
-
-    assert_eq!(normal_form(-1.5f128), Some((true, -112, 3 << 111)));
-    assert_eq!(normal_form(f128::MIN_POSITIVE), Some((false, -16494, 1 << 112)));
-    assert_eq!(normal_form(f128::MIN_POSITIVE / 2.0), Some((false, -16495, 1 << 112)));
-    assert_eq!(normal_form(f128::MAX), Some((false, 16271, (1 << 113) - 1)));
-    assert_eq!(normal_form(0.0), None);
-    assert_eq!(normal_form(f128::INFINITY), None);
-    assert_eq!(normal_form(f128::NAN), None);
-}
-
-#[test]
-fn test_pow2() {
-    use crate::f128::div_euclid::pow2;
-
-    assert_eq!(pow2(0), 1.0f128);
-    assert_eq!(pow2(4), 16.0f128);
-    assert_eq!(pow2(16384), f128::INFINITY);
-}
diff --git a/library/std/src/f128/soft.rs b/library/std/src/f128/soft.rs
new file mode 100644
index 0000000000000..30b6e82e18086
--- /dev/null
+++ b/library/std/src/f128/soft.rs
@@ -0,0 +1,350 @@
+//! Software floating point functions for `f128`.
+
+#[cfg(test)]
+mod tests;
+
+use crate::u256::U256;
+
+/// The floating point type.
+type FP = f128;
+
+/// Type of the exponent.
+type Exponent = i32;
+
+/// Type of `FP::to_bits`.
+type Bits = u128;
+
+/// Half as many bits as `Bits`.
+type HalfBits = u64;
+
+/// Twice as many bits as `Bits`.
+type DoubleBits = U256;
+
+/// Number of bits in the significand.
+const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1;
+
+/// Number of bits in the exponent.
+const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS;
+
+/// Encoded exponent = EXPONENT_BIAS + actual exponent.
+const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP;
+
+/// Represents an `FP` number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct Representation {
+    /// Sign.
+    sign: Sign,
+    /// Absolute value.
+    abs: Positive,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Sign {
+    Positive,
+    Negative,
+}
+
+/// Represents a positive number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Positive {
+    Zero,
+    Finite(PositiveFinite),
+    Infinity,
+    NaN,
+}
+
+/// Represents a non-zero, positive finite number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct PositiveFinite {
+    /// `number = 2^exp * mantissa`.
+    exp: Exponent,
+    /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)`
+    mantissa: Bits,
+}
+
+impl Representation {
+    fn new(x: FP) -> Self {
+        let bits = x.to_bits();
+
+        let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 {
+            Sign::Positive
+        } else {
+            Sign::Negative
+        };
+
+        const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1;
+        const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1;
+
+        let significand = bits & SIGNIFICAND_MASK;
+        let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK;
+
+        let abs = match biased_exponent {
+            0 if significand == 0 => Positive::Zero,
+            0 => {
+                // Subnormal number.
+                // Normalize it by shifting left, so that it has exactly
+                // SIGNIFICAND_BITS + 1 bits.
+                let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros());
+
+                Positive::Finite(PositiveFinite {
+                    exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent,
+                    mantissa: significand << shift,
+                })
+            }
+            EXPONENT_MASK if significand == 0 => Positive::Infinity,
+            EXPONENT_MASK => Positive::NaN,
+            _ => Positive::Finite(PositiveFinite {
+                exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent,
+                mantissa: 1 << SIGNIFICAND_BITS | significand,
+            }),
+        };
+        Representation { sign, abs }
+    }
+}
+
+/// Shift right that works even if `shift >= Bits::BITS`.
+///
+/// Requires `shift >= 0`.
+fn safe_shr(a: Bits, shift: Exponent) -> Bits {
+    if shift < Bits::BITS as Exponent { a >> shift } else { 0 }
+}
+
+/// Returns `2^exp`.
+fn pow2(exp: Exponent) -> FP {
+    let biased_exponent = exp + EXPONENT_BIAS;
+    if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) {
+        // Round to 0.
+        0.0
+    } else if biased_exponent <= 0 {
+        // Subnormal.
+        FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1))
+    } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 {
+        // Normal.
+        FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS)
+    } else {
+        // Round to infinity.
+        FP::INFINITY
+    }
+}
+
+/// Euclidean division.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: FP, b: FP) -> FP {
+    let a = Representation::new(a);
+    let b = Representation::new(b);
+    let res = match a.sign {
+        Sign::Positive => div_floor_pos(a.abs, b.abs),
+        Sign::Negative => -div_ceil_pos(a.abs, b.abs),
+    };
+    match b.sign {
+        Sign::Positive => res,
+        Sign::Negative => -res,
+    }
+}
+
+/// Division rounded down for positive numbers.
+fn div_floor_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b),
+        (Positive::Finite(_), Positive::Infinity) => 0.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded up for positive numbers.
+fn div_ceil_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b),
+        // Tricky case. It's 1.0 rather than 0.0. This way
+        // `div_euclid(-finite, inf) = -1.0` is consistent with
+        // `rem_euclid(-finite, inf) = inf`.
+        (Positive::Finite(_), Positive::Infinity) => 1.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded down for positive finite numbers.
+fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        0.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 <= HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS
+        // SIGNIFICAND_BITS >= HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS);
+        let q = (aa / b.mantissa) as HalfBits;
+        // We have to use `as` because `From<u64> for f128` is not yet implemented.
+        q as f128
+    } else if exp <= (Bits::BITS - 1) as Exponent {
+        let aa = DoubleBits::from(a.mantissa) << exp as u32;
+        let bb = DoubleBits::from(b.mantissa);
+        // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
+        let q: Bits = (aa / bb).wrap_u128();
+        q as FP
+    } else {
+        // exp >= Bits::BITS
+        let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
+        let bb = DoubleBits::from(b.mantissa);
+
+        // e > 0
+        // The result is `floor((aa << e) / bb)`
+        let e = exp - (Bits::BITS - 1) as Exponent;
+
+        // aa = q * bb + r
+        // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
+        // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`.
+        // 0 <= r < b
+        let (q, r) = aa.div_rem(bb);
+        let q: Bits = q.wrap_u128();
+        let r: Bits = r.wrap_u128();
+
+        // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb)
+        // 0 <= (r << e) / bb < 2^e
+        //
+        // There are two cases:
+        // 1. floor((r << e) / bb) = 0
+        // 2. 0 < floor((r << e) / bb) < 2^e
+        //
+        // Case 1:
+        // The result is q << e.
+        //
+        // Case 2:
+        // The result is (q << e) + non-zero low e bits.
+        //
+        // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends
+        // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4,
+        // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits
+        // of q, it doesn't matter *which* bits are non-zero. As long as:
+        //
+        // SIGNIFICAND_BITS <= Bits::BITS - 4
+        //
+        // we can just set bit 0 of q to 1 instead of using extra low-order bits.
+        //
+        // Therefore the result rounds the same way as (q | 1) << e.
+        //
+        // Case 1 happens when:
+        // (r << e) / bb < 1
+        // (r << e) <= bb - 1
+        // r <= ((bb - 1) >> e)
+        let case_1_bound = safe_shr(b.mantissa - 1, e);
+        let q_adj = if r <= case_1_bound {
+            // Case 1.
+            q
+        } else {
+            // Case 2.
+            const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS);
+            q | 1
+        };
+        q_adj as FP * pow2(e)
+    }
+}
+
+/// Division rounded up for positive finite numbers.
+fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        1.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 < HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS
+        // SIGNIFICAND_BITS > HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS);
+        let q = ((aa - 1) / b.mantissa) as HalfBits + 1;
+        // We have to use `as` because `From<u64> for f128` is not yet implemented.
+        q as f128
+    } else if exp <= (Bits::BITS - 1) as Exponent {
+        let aa = DoubleBits::from(a.mantissa) << exp as u32;
+        let bb = DoubleBits::from(b.mantissa);
+        // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1
+        //   <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX
+        let q = ((aa - U256::ONE) / bb).wrap_u128() + 1;
+        q as FP
+    } else {
+        let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
+        let bb = DoubleBits::from(b.mantissa);
+        // e > 0
+        // The result is ceil((aa << e) / b).
+        let e = exp - (Bits::BITS - 1) as Exponent;
+
+        // aa = q * bb + r
+        // `q < Bits::MAX` as in the previous case.
+        // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`.
+        // 0 <= r < b
+        let (q, r) = aa.div_rem(bb);
+        let q: Bits = q.wrap_u128();
+        let r: Bits = r.wrap_u128();
+
+        // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb)
+        // 0 <= (r << e) / bb < 2^e
+        //
+        // There are three cases:
+        // 1. ceil((r << e) / bb) = 0
+        // 2. 0 < ceil((r << e) / bb) < 2^e
+        // 3. ceil((r << e) / bb) = 2^e
+        //
+        // Case 1:
+        // The result is q << e.
+        //
+        // Case 2:
+        // The result is (q << e) + non-zero low e bits.
+        //
+        // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends
+        // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4,
+        // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits
+        // of q, it doesn't matter *which* bits are non-zero. As long as:
+        //
+        // SIGNIFICAND_BITS <= Bits::BITS - 4
+        //
+        // we can just set bit 0 of q to 1 instead of using extra low-order bits.
+        //
+        // Therefore the result rounds the same way as (q | 1) << e.
+        //
+        // In case 3:
+        // The result is (q + 1) << e.
+        //
+        // Case 1 happens when r = 0.
+        // Case 3 happens when:
+        // (r << e) / bb > (1 << e) - 1
+        // (r << e) > (bb << e) - b
+        // ((bb - r) << e) <= bb - 1
+        // bb - r <= (bb - 1) >> e
+        // r >= bb - ((bb - 1) >> e)
+        let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e);
+        let q_adj = if r == 0 {
+            // Case 1.
+            q
+        } else if r < case_3_bound {
+            // Case 2.
+            const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS);
+            q | 1
+        } else {
+            // Case 3.
+            // No overflow because `q < Bits::MAX`.
+            q + 1
+        };
+        q_adj as FP * pow2(e)
+    }
+}
diff --git a/library/std/src/f128/soft/tests.rs b/library/std/src/f128/soft/tests.rs
new file mode 100644
index 0000000000000..2156e3fd8448a
--- /dev/null
+++ b/library/std/src/f128/soft/tests.rs
@@ -0,0 +1,49 @@
+use crate::f128::soft::{Positive, PositiveFinite, Representation, Sign, pow2};
+
+#[test]
+fn test_representation() {
+    assert_eq!(Representation::new(-1.5f128), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Finite(PositiveFinite { exp: -112, mantissa: 3 << 111 }),
+    });
+
+    assert_eq!(Representation::new(f128::MIN_POSITIVE), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -16494, mantissa: 1 << 112 }),
+    });
+
+    assert_eq!(Representation::new(f128::MIN_POSITIVE / 2.0), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -16495, mantissa: 1 << 112 }),
+    });
+
+    assert_eq!(Representation::new(f128::MAX), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: 16271, mantissa: (1 << 113) - 1 }),
+    });
+
+    assert_eq!(Representation::new(-0.0f128), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Zero,
+    });
+
+    assert_eq!(Representation::new(f128::INFINITY), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Infinity,
+    });
+
+    assert_eq!(Representation::new(f128::NAN), Representation {
+        sign: Sign::Positive,
+        abs: Positive::NaN,
+    });
+}
+
+#[test]
+fn test_pow2() {
+    assert_eq!(pow2(2), 4.0);
+    assert_eq!(pow2(-1), 0.5);
+    assert_eq!(pow2(-16495), 0.0);
+    assert_eq!(pow2(-16382), f128::MIN_POSITIVE);
+    assert_eq!(pow2(-16383), f128::MIN_POSITIVE / 2.0);
+    assert_eq!(pow2(16384), f128::INFINITY);
+}
diff --git a/library/std/src/f128/tests.rs b/library/std/src/f128/tests.rs
index 299d74d19b005..667e4dc68bc30 100644
--- a/library/std/src/f128/tests.rs
+++ b/library/std/src/f128/tests.rs
@@ -460,7 +460,6 @@ fn test_mul_add() {
 }
 
 #[test]
-#[cfg(reliable_f128_math)]
 fn test_div_euclid() {
     use core::cmp::Ordering;
 
@@ -469,7 +468,6 @@ fn test_div_euclid() {
     let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS);
 
     // Infinity and NaN.
-
     assert!(nan.div_euclid(5.0).is_nan());
     assert!(inf.div_euclid(inf).is_nan());
     assert!(0.0f128.div_euclid(0.0).is_nan());
@@ -479,6 +477,16 @@ fn test_div_euclid() {
     assert_eq!(5.0f128.div_euclid(0.0), inf);
     assert_eq!((-5.0f128).div_euclid(0.0), -inf);
 
+    // Small / infinity.
+    assert_eq!(Ordering::Equal, 5.0f128.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, 0.0f128.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(inf).total_cmp(&-0.0));
+    assert_eq!((-5.0f128).div_euclid(inf), -1.0);
+    assert_eq!(Ordering::Equal, 5.0f128.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, 0.0f128.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(-inf).total_cmp(&0.0));
+    assert_eq!((-5.0f128).div_euclid(-inf), 1.0);
+
     // 0 / x
     assert_eq!(Ordering::Equal, 0.0f128.div_euclid(10.0).total_cmp(&0.0));
     assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(10.0).total_cmp(&-0.0));
diff --git a/library/std/src/f128/u256.rs b/library/std/src/f128/u256.rs
deleted file mode 100644
index 5c812437acbbe..0000000000000
--- a/library/std/src/f128/u256.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-//! Very limited 256-bit arithmetic.
-
-#[cfg(test)]
-mod tests;
-
-use core::ops::{Sub, SubAssign};
-
-#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
-pub(crate) struct U256 {
-    high: u128,
-    low: u128,
-}
-
-impl U256 {
-    pub(crate) const ONE: Self = U256 { high: 0, low: 1 };
-
-    /// `x << shift`.
-    ///
-    /// Requires `0 <= shift < 128`.
-    pub(crate) fn shl_u128(x: u128, shift: u32) -> Self {
-        Self { low: x << shift, high: if shift != 0 { x >> (128 - shift) } else { 0 } }
-    }
-
-    /// The number of leading 0 bits.
-    pub(crate) fn leading_zeros(self) -> u32 {
-        if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() }
-    }
-
-    /// Returns (quotient, remainder).
-    ///
-    /// Requires `self < (rhs << 128)`
-    pub(crate) fn div_rem(mut self, rhs: u128) -> (u128, u128) {
-        let rhs_len = 128 - rhs.leading_zeros();
-        let self_len = 256 - self.leading_zeros();
-        // self < (rhs << shift_limit)
-        let shift_limit = (self_len + 1).saturating_sub(rhs_len).min(128);
-        let mut q = 0;
-        for shift in (0..shift_limit).rev() {
-            let rhs_shifted = Self::shl_u128(rhs, shift);
-            if self >= rhs_shifted {
-                q |= 1 << shift;
-                self -= rhs_shifted;
-            }
-        }
-        (q, self.low)
-    }
-}
-
-impl Sub for U256 {
-    type Output = Self;
-
-    fn sub(self, rhs: U256) -> Self {
-        let (low, borrow) = self.low.overflowing_sub(rhs.low);
-        let (high, _) = self.high.borrowing_sub(rhs.high, borrow);
-        Self { low, high }
-    }
-}
-
-impl SubAssign for U256 {
-    fn sub_assign(&mut self, rhs: U256) {
-        *self = *self - rhs;
-    }
-}
diff --git a/library/std/src/f128/u256/tests.rs b/library/std/src/f128/u256/tests.rs
deleted file mode 100644
index 15ed4981eb9b4..0000000000000
--- a/library/std/src/f128/u256/tests.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use crate::f128::u256::U256;
-
-#[test]
-fn test_shl_u128() {
-    let a = 0x0123456789abcdef0123456789abcdef;
-
-    let b = U256::shl_u128(a, 0);
-    assert_eq!(b.high, 0);
-    assert_eq!(b.low, a);
-
-    let b = U256::shl_u128(a, 40);
-    assert_eq!(b.high, 0x0123456789);
-    assert_eq!(b.low, 0xabcdef0123456789abcdef0000000000);
-}
-
-#[test]
-fn test_leading_zeros() {
-    assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256);
-    assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116);
-    assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252);
-}
-
-#[test]
-fn test_sub() {
-    let a = U256 { high: 7, low: 10 };
-    let b = U256 { high: 3, low: 2 };
-    let c = U256 { high: 3, low: 15 };
-    assert_eq!(a - b, U256 { high: 4, low: 8 });
-    assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 });
-}
-
-#[test]
-fn test_div_rem() {
-    let a = U256 { high: 0, low: 7 };
-    let b = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef };
-    let c = 0x9876543210abc;
-    assert_eq!(a.div_rem(c), (0, 7));
-    assert_eq!(b.div_rem(c), (0x1207a42fc2c725f9cf302ff31d, 0x7c11e593922a3));
-}
diff --git a/library/std/src/f16.rs b/library/std/src/f16.rs
index 062de8d6ef622..00095b740ed80 100644
--- a/library/std/src/f16.rs
+++ b/library/std/src/f16.rs
@@ -4,7 +4,7 @@
 //!
 //! Mathematically significant numbers are provided in the `consts` sub-module.
 
-mod div_euclid;
+mod soft;
 
 #[cfg(test)]
 mod tests;
@@ -270,7 +270,7 @@ impl f16 {
     #[unstable(feature = "f16", issue = "116909")]
     #[must_use = "method returns a new number and does not mutate the original value"]
     pub fn div_euclid(self, rhs: f16) -> f16 {
-        div_euclid::div_euclid(self, rhs)
+        soft::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
diff --git a/library/std/src/f16/div_euclid.rs b/library/std/src/f16/div_euclid.rs
deleted file mode 100644
index df770c3af1c25..0000000000000
--- a/library/std/src/f16/div_euclid.rs
+++ /dev/null
@@ -1,111 +0,0 @@
-#[cfg(test)]
-mod tests;
-
-/// Software implementation of `f16::div_euclid`.
-#[allow(dead_code)] // f16::div_euclid is cfg(not(test))
-pub(crate) fn div_euclid(a: f16, b: f16) -> f16 {
-    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
-        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
-    {
-        let exp = a_exp - b_exp;
-        match (a_neg, b_neg) {
-            (false, false) => div_floor(exp, a_m, b_m),
-            (true, false) => -div_ceil(exp, a_m, b_m),
-            (false, true) => -div_floor(exp, a_m, b_m),
-            (true, true) => div_ceil(exp, a_m, b_m),
-        }
-    } else {
-        // `a` or `b` are +-0.0 or infinity or NaN.
-        // `a / b` is also +-0.0 or infinity or NaN.
-        // There is no need to round to an integer.
-        a / b
-    }
-}
-
-/// Returns `floor((a << exp) / b)`.
-///
-/// Requires `2^10 <= a, b < 2^11`.
-fn div_floor(exp: i16, a: u16, b: u16) -> f16 {
-    if exp < 0 {
-        0.0
-    } else if exp <= 5 {
-        // aa < (2^11 << 5) = 2^16
-        let aa = a << exp;
-        // q < 2^16 / 2^10 = 2^6
-        let q = (aa / b) as u8;
-        // We have to use `as` because `From<u8> for f16` is not yet implemented.
-        q as f16
-    } else if exp <= 16 {
-        // aa < (2^11 << 16) = 2^27
-        let aa = u32::from(a) << exp;
-        let bb = u32::from(b);
-        // q < 2^27 / 2^10 = 2^17
-        let q = aa / bb;
-        q as f16
-    } else {
-        // exp >= 17
-        // result >= (2^10 << 17) / 2^11 = 2^16
-        // Exponent 16 is too large to represent in f16.
-        f16::INFINITY
-    }
-}
-
-/// Returns `ceil((a << exp) / b)`.
-///
-/// Requires `2^10 <= a, b < 2^11`.
-fn div_ceil(exp: i16, a: u16, b: u16) -> f16 {
-    if exp < 0 {
-        1.0
-    } else if exp <= 5 {
-        // aa < (2^11 << 5) = 2^16
-        let aa = a << exp;
-        // q < 2^16 / 2^10 + 1 = 2^6 + 1
-        let q = ((aa - 1) / b) as u8 + 1;
-        // We have to use `as` because `From<u8> for f16` is not yet implemented.
-        q as f16
-    } else if exp <= 16 {
-        // aa <= ((2^11 - 1) << 16) = 2^27 - 2^16
-        let aa = u32::from(a) << exp;
-        let bb = u32::from(b);
-        // q <= (2^27 - 2^16) / 2^10 + 1 = 2^17 - 2^6 + 1
-        let q = (aa - 1) / bb + 1;
-        q as f16
-    } else {
-        // exp >= 17
-        // result >= (2^10 << 17) / 2^11 = 2^16
-        // Exponent 16 is too large to represent in f16.
-        f16::INFINITY
-    }
-}
-
-/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`.
-///
-/// `x = (-1)^sign * 2^exp * mantissa`
-///
-/// `2^10 <= mantissa < 2^11`
-fn normal_form(x: f16) -> Option<(bool, i16, u16)> {
-    let bits = x.to_bits();
-    let sign = bits >> 15 != 0;
-    let biased_exponent = (bits >> 10 & 0x1f) as i16;
-    let significand = bits & 0x3ff;
-    match biased_exponent {
-        0 if significand == 0 => {
-            // 0.0
-            None
-        }
-        0 => {
-            // Subnormal number: 2^(-14-10) * significand.
-            // We want mantissa to have exactly 5 leading zeros.
-            let shift = significand.leading_zeros() - 5;
-            Some((sign, -14 - 10 - shift as i16, significand << shift))
-        }
-        0x1f => {
-            // Infinity or NaN.
-            None
-        }
-        _ => {
-            // Normal number: 2^(biased_exponent-15-10) * (2^10 + significand)
-            Some((sign, biased_exponent - 15 - 10, 1 << 10 | significand))
-        }
-    }
-}
diff --git a/library/std/src/f16/div_euclid/tests.rs b/library/std/src/f16/div_euclid/tests.rs
deleted file mode 100644
index e428f16c98d66..0000000000000
--- a/library/std/src/f16/div_euclid/tests.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-#[cfg(reliable_f16_math)]
-#[test]
-fn test_normal_form() {
-    use crate::f16::div_euclid::normal_form;
-
-    assert_eq!(normal_form(-1.5f16), Some((true, -10, 0x600)));
-    assert_eq!(normal_form(f16::MIN_POSITIVE), Some((false, -24, 0x400)));
-    assert_eq!(normal_form(f16::MIN_POSITIVE / 2.0), Some((false, -25, 0x400)));
-    assert_eq!(normal_form(f16::MAX), Some((false, 5, 0x7ff)));
-    assert_eq!(normal_form(0.0), None);
-    assert_eq!(normal_form(f16::INFINITY), None);
-    assert_eq!(normal_form(f16::NAN), None);
-}
diff --git a/library/std/src/f16/soft.rs b/library/std/src/f16/soft.rs
new file mode 100644
index 0000000000000..2ebd1bed1ef9b
--- /dev/null
+++ b/library/std/src/f16/soft.rs
@@ -0,0 +1,213 @@
+//! Software floating point functions for `f16`.
+
+#[cfg(test)]
+mod tests;
+
+/// The floating point type.
+type FP = f16;
+
+/// Type of the exponent.
+type Exponent = i16;
+
+/// Type of `FP::to_bits`.
+type Bits = u16;
+
+/// Half as many bits as `Bits`.
+type HalfBits = u8;
+
+/// Twice as many bits as `Bits`.
+type DoubleBits = u32;
+
+/// Number of bits in the significand.
+const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1;
+
+/// Number of bits in the exponent.
+const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS;
+
+/// Encoded exponent = EXPONENT_BIAS + actual exponent.
+const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP as Exponent;
+
+/// Represents an `FP` number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct Representation {
+    /// Sign.
+    sign: Sign,
+    /// Absolute value.
+    abs: Positive,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Sign {
+    Positive,
+    Negative,
+}
+
+/// Represents a positive number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Positive {
+    Zero,
+    Finite(PositiveFinite),
+    Infinity,
+    NaN,
+}
+
+/// Represents a non-zero, positive finite number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct PositiveFinite {
+    /// `number = 2^exp * mantissa`.
+    exp: Exponent,
+    /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)`
+    mantissa: Bits,
+}
+
+impl Representation {
+    fn new(x: FP) -> Self {
+        let bits = x.to_bits();
+
+        let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 {
+            Sign::Positive
+        } else {
+            Sign::Negative
+        };
+
+        const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1;
+        const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1;
+
+        let significand = bits & SIGNIFICAND_MASK;
+        let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK;
+
+        let abs = match biased_exponent {
+            0 if significand == 0 => Positive::Zero,
+            0 => {
+                // Subnormal number.
+                // Normalize it by shifting left, so that it has exactly
+                // SIGNIFICAND_BITS + 1 bits.
+                let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros());
+
+                Positive::Finite(PositiveFinite {
+                    exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent,
+                    mantissa: significand << shift,
+                })
+            }
+            EXPONENT_MASK if significand == 0 => Positive::Infinity,
+            EXPONENT_MASK => Positive::NaN,
+            _ => Positive::Finite(PositiveFinite {
+                exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent,
+                mantissa: 1 << SIGNIFICAND_BITS | significand,
+            }),
+        };
+        Representation { sign, abs }
+    }
+}
+
+/// Euclidean division.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: FP, b: FP) -> FP {
+    let a = Representation::new(a);
+    let b = Representation::new(b);
+    let res = match a.sign {
+        Sign::Positive => div_floor_pos(a.abs, b.abs),
+        Sign::Negative => -div_ceil_pos(a.abs, b.abs),
+    };
+    match b.sign {
+        Sign::Positive => res,
+        Sign::Negative => -res,
+    }
+}
+
+/// Division rounded down for positive numbers.
+fn div_floor_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b),
+        (Positive::Finite(_), Positive::Infinity) => 0.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded up for positive numbers.
+fn div_ceil_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b),
+        // Tricky case. It's 1.0 rather than 0.0. This way
+        // `div_euclid(-finite, inf) = -1.0` is consistent with
+        // `rem_euclid(-finite, inf) = inf`.
+        (Positive::Finite(_), Positive::Infinity) => 1.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded down for positive finite numbers.
+fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        0.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 <= HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS
+        // SIGNIFICAND_BITS >= HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS);
+        let q = (aa / b.mantissa) as HalfBits;
+        // We have to use `as` because `From<u8> for f16` is not yet implemented.
+        q as f16
+    } else if exp <= FP::MAX_EXP as Exponent {
+        // Verify that `aa` fits in `DoubleBits`.
+        const _: () = assert!(FP::MAX_EXP as u32 <= Bits::BITS);
+        let aa = DoubleBits::from(a.mantissa) << exp;
+        let bb = DoubleBits::from(b.mantissa);
+        let q = aa / bb;
+        q as FP
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
+    }
+}
+
+/// Division rounded up for positive finite numbers.
+fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        1.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 < HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS
+        // SIGNIFICAND_BITS > HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS);
+        let q = ((aa - 1) / b.mantissa) as HalfBits + 1;
+        // We have to use `as` because `From<u8> for f16` is not yet implemented.
+        q as f16
+    } else if exp <= FP::MAX_EXP as Exponent {
+        // Verify that `aa` fits in `DoubleBits`.
+        const _: () = assert!(FP::MAX_EXP as u32 <= Bits::BITS);
+        let aa = DoubleBits::from(a.mantissa) << exp;
+        let bb = DoubleBits::from(b.mantissa);
+        let q = (aa - 1) / bb + 1;
+        q as FP
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
+    }
+}
diff --git a/library/std/src/f16/soft/tests.rs b/library/std/src/f16/soft/tests.rs
new file mode 100644
index 0000000000000..ce277a20771b5
--- /dev/null
+++ b/library/std/src/f16/soft/tests.rs
@@ -0,0 +1,39 @@
+use crate::f16::soft::{Positive, PositiveFinite, Representation, Sign};
+
+#[test]
+fn test_representation() {
+    assert_eq!(Representation::new(-1.5f16), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Finite(PositiveFinite { exp: -10, mantissa: 0x600 }),
+    });
+
+    assert_eq!(Representation::new(f16::MIN_POSITIVE), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -24, mantissa: 0x400 }),
+    });
+
+    assert_eq!(Representation::new(f16::MIN_POSITIVE / 2.0), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -25, mantissa: 0x400 }),
+    });
+
+    assert_eq!(Representation::new(f16::MAX), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: 5, mantissa: 0x7ff }),
+    });
+
+    assert_eq!(Representation::new(-0.0), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Zero,
+    });
+
+    assert_eq!(Representation::new(f16::INFINITY), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Infinity,
+    });
+
+    assert_eq!(Representation::new(f16::NAN), Representation {
+        sign: Sign::Positive,
+        abs: Positive::NaN,
+    });
+}
diff --git a/library/std/src/f16/tests.rs b/library/std/src/f16/tests.rs
index 4fc5f5d3dec09..141f07e54a2cf 100644
--- a/library/std/src/f16/tests.rs
+++ b/library/std/src/f16/tests.rs
@@ -443,7 +443,6 @@ fn test_mul_add() {
 }
 
 #[test]
-#[cfg(reliable_f16_math)]
 fn test_div_euclid() {
     use core::cmp::Ordering;
 
@@ -461,6 +460,16 @@ fn test_div_euclid() {
     assert_eq!(5.0f16.div_euclid(0.0), inf);
     assert_eq!((-5.0f16).div_euclid(0.0), -inf);
 
+    // Small / infinity.
+    assert_eq!(Ordering::Equal, 5.0f16.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, 0.0f16.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(inf).total_cmp(&-0.0));
+    assert_eq!((-5.0f16).div_euclid(inf), -1.0);
+    assert_eq!(Ordering::Equal, 5.0f16.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, 0.0f16.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(-inf).total_cmp(&0.0));
+    assert_eq!((-5.0f16).div_euclid(-inf), 1.0);
+
     // 0 / x
     assert_eq!(Ordering::Equal, 0.0f16.div_euclid(10.0).total_cmp(&0.0));
     assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(10.0).total_cmp(&-0.0));
diff --git a/library/std/src/f32.rs b/library/std/src/f32.rs
index 516f6ef0fac3b..6207177768f90 100644
--- a/library/std/src/f32.rs
+++ b/library/std/src/f32.rs
@@ -12,7 +12,7 @@
 #![stable(feature = "rust1", since = "1.0.0")]
 #![allow(missing_docs)]
 
-mod div_euclid;
+mod soft;
 
 #[cfg(test)]
 mod tests;
@@ -250,7 +250,7 @@ impl f32 {
     #[inline]
     #[stable(feature = "euclidean_division", since = "1.38.0")]
     pub fn div_euclid(self, rhs: f32) -> f32 {
-        div_euclid::div_euclid(self, rhs)
+        soft::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
diff --git a/library/std/src/f32/div_euclid.rs b/library/std/src/f32/div_euclid.rs
deleted file mode 100644
index 6040b82d26cb9..0000000000000
--- a/library/std/src/f32/div_euclid.rs
+++ /dev/null
@@ -1,204 +0,0 @@
-#[cfg(test)]
-mod tests;
-
-/// Software implementation of `f32::div_euclid`.
-#[allow(dead_code)]
-pub(crate) fn div_euclid(a: f32, b: f32) -> f32 {
-    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
-        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
-    {
-        let exp = a_exp - b_exp;
-        match (a_neg, b_neg) {
-            (false, false) => div_floor(exp, a_m, b_m),
-            (true, false) => -div_ceil(exp, a_m, b_m),
-            (false, true) => -div_floor(exp, a_m, b_m),
-            (true, true) => div_ceil(exp, a_m, b_m),
-        }
-    } else {
-        // `a` or `b` are +-0.0 or infinity or NaN.
-        // `a / b` is also +-0.0 or infinity or NaN.
-        // There is no need to round to an integer.
-        a / b
-    }
-}
-
-/// Returns `floor((a << exp) / b)`.
-///
-/// Requires `2^23 <= a, b < 2^24`
-fn div_floor(exp: i32, a: u32, b: u32) -> f32 {
-    if exp < 0 {
-        0.0
-    } else if exp <= 8 {
-        // aa < (2^24 << 8) = 2^32
-        let aa = a << exp;
-        // q < 2^32 / 2^23 = 2^9
-        let q = (aa / b) as u16;
-        q.into()
-    } else if exp <= 31 {
-        // aa < (2^24 << 31) = 2^55
-        let aa = u64::from(a) << exp;
-        let bb = u64::from(b);
-        // q < 2^55 / 2^23 = 2^32
-        let q = (aa / bb) as u32;
-        q as f32
-    } else {
-        // aa >= (2^23 << 31) = 2^54
-        // aa < (2^24 << 31) = 2^55
-        let aa = u64::from(a) << 31;
-        let bb = u64::from(b);
-        // e > 0
-        // The result is floor((aa << e) / b).
-        let e = (exp - 31) as u32;
-
-        // aa = q * b + r
-        // q >= 2^54 / 2^24 = 2^30
-        // q < 2^55 / 2^23 = 2^32
-        let q = (aa / bb) as u32;
-        // 0 <= r < b
-        let r = (aa % bb) as u32;
-
-        // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
-        // 0 <= (r << e) / b < 2^e
-        //
-        // There are two cases:
-        // 1. floor((r << e) / b) = 0
-        // 2. 0 < floor((r << e) / b) < 2^e
-        //
-        // In case 1:
-        // The result is q << e.
-        //
-        // In case 2:
-        // The result is (q << e) + non-zero low e bits.
-        // This rounds the same way as (q | 1) << e because rounding beyond
-        // the 25 most significant bits of q depends only on whether the low-order
-        // bits are non-zero.
-        //
-        // Case 1 happens when:
-        // (r << e) / b < 1
-        // (r << e) <= b - 1
-        // r <= ((b - 1) >> e)
-        let case_1_bound = if e < 32 { (b - 1) >> e } else { 0 };
-        let q_adj = if r <= case_1_bound {
-            // Case 1.
-            q
-        } else {
-            // Case 2.
-            q | 1
-        };
-        q_adj as f32 * pow2(e)
-    }
-}
-
-/// Returns ceil((a << exp) / b).
-///
-/// Requires `2^23 <= a, b < 2^24`
-fn div_ceil(exp: i32, a: u32, b: u32) -> f32 {
-    if exp < 0 {
-        1.0
-    } else if exp <= 8 {
-        // aa < (2^24 << 8) = 2^32
-        let aa = a << exp;
-        // q < 2^32 / 2^23 + 1 = 2^9 + 1
-        let q = ((aa - 1) / b) as u16 + 1;
-        q.into()
-    } else if exp <= 31 {
-        // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31
-        let aa = u64::from(a) << exp;
-        let bb = u64::from(b);
-        // q <= (2^55 - 2^31) / 2^23 + 1 = 2^32 - 2^8 + 1
-        let q = ((aa - 1) / bb) as u32 + 1;
-        q as f32
-    } else {
-        // aa >= (2^23 << 31) = 2^54
-        // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31
-        let aa = u64::from(a) << 31;
-        let bb = u64::from(b);
-        // e > 0
-        // The result is ceil((aa << e) / b).
-        let e = (exp - 31) as u32;
-
-        // aa = q * b + r
-        // q >= 2^54 / 2^24 = 2^30
-        // q <= (2^55 - 2^31) / 2^23 = 2^32 - 2^8
-        let q = (aa / bb) as u32;
-        // 0 <= r < b
-        let r = (aa % bb) as u32;
-
-        // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
-        // 0 <= (r << e) / b < 2^e
-        //
-        // There are three cases:
-        // 1. ceil((r << e) / b) = 0
-        // 2. 0 < ceil((r << e) / b) < 2^e
-        // 3. ceil((r << e) / b) = 2^e
-        //
-        // In case 1:
-        // The result is q << e.
-        //
-        // In case 2:
-        // The result is (q << e) + non-zero low e bits.
-        // This rounds the same way as (q | 1) << e because rounding beyond
-        // the 25 most significant bits of q depends only on whether the low-order
-        // bits are non-zero.
-        //
-        // In case 3:
-        // The result is (q + 1) << e.
-        //
-        // Case 1 happens when r = 0.
-        // Case 3 happens when:
-        // (r << e) / b > (1 << e) - 1
-        // (r << e) > (b << e) - b
-        // ((b - r) << e) <= b - 1
-        // b - r <= (b - 1) >> e
-        // r >= b - ((b - 1) >> e)
-        let case_3_bound = b - if e < 32 { (b - 1) >> e } else { 0 };
-        let q_adj = if r == 0 {
-            // Case 1.
-            q
-        } else if r < case_3_bound {
-            // Case 2.
-            q | 1
-        } else {
-            // Case 3.
-            q + 1
-        };
-        q_adj as f32 * pow2(e)
-    }
-}
-
-/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`.
-///
-/// `x = (-1)^sign * 2^exp * mantissa`
-///
-/// `2^23 <= mantissa < 2^24`
-fn normal_form(x: f32) -> Option<(bool, i32, u32)> {
-    let bits = x.to_bits();
-    let sign = bits >> 31 != 0;
-    let biased_exponent = (bits >> 23 & 0xff) as i32;
-    let significand = bits & 0x7fffff;
-    match biased_exponent {
-        0 if significand == 0 => {
-            // 0.0
-            None
-        }
-        0 => {
-            // Subnormal number: 2^(-126-23) * significand.
-            // We want mantissa to have exactly 8 leading zeros.
-            let shift = significand.leading_zeros() - 8;
-            Some((sign, -126 - 23 - shift as i32, significand << shift))
-        }
-        0xff => {
-            // Infinity or NaN.
-            None
-        }
-        _ => {
-            // Normal number: 2^(biased_exponent-127-23) * (2^23 + significand)
-            Some((sign, biased_exponent - 127 - 23, 1 << 23 | significand))
-        }
-    }
-}
-
-/// Returns `2^exp`.
-fn pow2(exp: u32) -> f32 {
-    if exp <= 127 { f32::from_bits((exp + 127) << 23) } else { f32::INFINITY }
-}
diff --git a/library/std/src/f32/div_euclid/tests.rs b/library/std/src/f32/div_euclid/tests.rs
deleted file mode 100644
index e1c139f7e16ed..0000000000000
--- a/library/std/src/f32/div_euclid/tests.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-#[test]
-fn test_normal_form() {
-    use crate::f32::div_euclid::normal_form;
-
-    assert_eq!(normal_form(-1.5f32), Some((true, -23, 0xc00000)));
-    assert_eq!(normal_form(f32::MIN_POSITIVE), Some((false, -149, 0x800000)));
-    assert_eq!(normal_form(f32::MIN_POSITIVE / 2.0), Some((false, -150, 0x800000)));
-    assert_eq!(normal_form(f32::MAX), Some((false, 104, 0xffffff)));
-    assert_eq!(normal_form(0.0), None);
-    assert_eq!(normal_form(f32::INFINITY), None);
-    assert_eq!(normal_form(f32::NAN), None);
-}
-
-#[test]
-fn test_pow2() {
-    use crate::f32::div_euclid::pow2;
-
-    assert_eq!(pow2(0), 1.0f32);
-    assert_eq!(pow2(4), 16.0f32);
-    assert_eq!(pow2(128), f32::INFINITY);
-}
diff --git a/library/std/src/f32/soft.rs b/library/std/src/f32/soft.rs
new file mode 100644
index 0000000000000..9a280893eccf6
--- /dev/null
+++ b/library/std/src/f32/soft.rs
@@ -0,0 +1,344 @@
+//! Software floating point functions for `f32`.
+
+#[cfg(test)]
+mod tests;
+
+/// The floating point type.
+type FP = f32;
+
+/// Type of the exponent.
+type Exponent = i32;
+
+/// Type of `FP::to_bits`.
+type Bits = u32;
+
+/// Half as many bits as `Bits`.
+type HalfBits = u16;
+
+/// Twice as many bits as `Bits`.
+type DoubleBits = u64;
+
+/// Number of bits in the significand.
+const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1;
+
+/// Number of bits in the exponent.
+const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS;
+
+/// Encoded exponent = EXPONENT_BIAS + actual exponent.
+const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP;
+
+/// Represents an `FP` number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct Representation {
+    /// Sign.
+    sign: Sign,
+    /// Absolute value.
+    abs: Positive,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Sign {
+    Positive,
+    Negative,
+}
+
+/// Represents a positive number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Positive {
+    Zero,
+    Finite(PositiveFinite),
+    Infinity,
+    NaN,
+}
+
+/// Represents a non-zero, positive finite number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct PositiveFinite {
+    /// `number = 2^exp * mantissa`.
+    exp: Exponent,
+    /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)`
+    mantissa: Bits,
+}
+
+impl Representation {
+    fn new(x: FP) -> Self {
+        let bits = x.to_bits();
+
+        let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 {
+            Sign::Positive
+        } else {
+            Sign::Negative
+        };
+
+        const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1;
+        const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1;
+
+        let significand = bits & SIGNIFICAND_MASK;
+        let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK;
+
+        let abs = match biased_exponent {
+            0 if significand == 0 => Positive::Zero,
+            0 => {
+                // Subnormal number.
+                // Normalize it by shifting left, so that it has exactly
+                // SIGNIFICAND_BITS + 1 bits.
+                let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros());
+
+                Positive::Finite(PositiveFinite {
+                    exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent,
+                    mantissa: significand << shift,
+                })
+            }
+            EXPONENT_MASK if significand == 0 => Positive::Infinity,
+            EXPONENT_MASK => Positive::NaN,
+            _ => Positive::Finite(PositiveFinite {
+                exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent,
+                mantissa: 1 << SIGNIFICAND_BITS | significand,
+            }),
+        };
+        Representation { sign, abs }
+    }
+}
+
+/// Shift right that works even if `shift >= Bits::BITS`.
+///
+/// Requires `shift >= 0`.
+fn safe_shr(a: Bits, shift: Exponent) -> Bits {
+    if shift < Bits::BITS as Exponent { a >> shift } else { 0 }
+}
+
+/// Returns `2^exp`.
+fn pow2(exp: Exponent) -> FP {
+    let biased_exponent = exp + EXPONENT_BIAS;
+    if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) {
+        // Round to 0.
+        0.0
+    } else if biased_exponent <= 0 {
+        // Subnormal.
+        FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1))
+    } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 {
+        // Normal.
+        FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS)
+    } else {
+        // Round to infinity.
+        FP::INFINITY
+    }
+}
+
+/// Euclidean division.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: FP, b: FP) -> FP {
+    let a = Representation::new(a);
+    let b = Representation::new(b);
+    let res = match a.sign {
+        Sign::Positive => div_floor_pos(a.abs, b.abs),
+        Sign::Negative => -div_ceil_pos(a.abs, b.abs),
+    };
+    match b.sign {
+        Sign::Positive => res,
+        Sign::Negative => -res,
+    }
+}
+
+/// Division rounded down for positive numbers.
+fn div_floor_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b),
+        (Positive::Finite(_), Positive::Infinity) => 0.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded up for positive numbers.
+fn div_ceil_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b),
+        // Tricky case. It's 1.0 rather than 0.0. This way
+        // `div_euclid(-finite, inf) = -1.0` is consistent with
+        // `rem_euclid(-finite, inf) = inf`.
+        (Positive::Finite(_), Positive::Infinity) => 1.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded down for positive finite numbers.
+fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        0.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 <= HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS
+        // SIGNIFICAND_BITS >= HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS);
+        let q = (aa / b.mantissa) as HalfBits;
+        q.into()
+    } else if exp <= (Bits::BITS - 1) as Exponent {
+        let aa = DoubleBits::from(a.mantissa) << exp;
+        let bb = DoubleBits::from(b.mantissa);
+        // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
+        let q = (aa / bb) as Bits;
+        q as FP
+    } else {
+        // exp >= Bits::BITS
+        let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
+        let bb = DoubleBits::from(b.mantissa);
+
+        // e > 0
+        // The result is `floor((aa << e) / bb)`
+        let e = exp - (Bits::BITS - 1) as Exponent;
+
+        // aa = q * bb + r
+        // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
+        // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`.
+        let q = (aa / bb) as Bits;
+        // 0 <= r < b
+        let r = (aa % bb) as Bits;
+
+        // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb)
+        // 0 <= (r << e) / bb < 2^e
+        //
+        // There are two cases:
+        // 1. floor((r << e) / bb) = 0
+        // 2. 0 < floor((r << e) / bb) < 2^e
+        //
+        // Case 1:
+        // The result is q << e.
+        //
+        // Case 2:
+        // The result is (q << e) + non-zero low e bits.
+        //
+        // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends
+        // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4,
+        // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits
+        // of q, it doesn't matter *which* bits are non-zero. As long as:
+        //
+        // SIGNIFICAND_BITS <= Bits::BITS - 4
+        //
+        // we can just set bit 0 of q to 1 instead of using extra low-order bits.
+        //
+        // Therefore the result rounds the same way as (q | 1) << e.
+        //
+        // Case 1 happens when:
+        // (r << e) / bb < 1
+        // (r << e) <= bb - 1
+        // r <= ((bb - 1) >> e)
+        let case_1_bound = safe_shr(b.mantissa - 1, e);
+        let q_adj = if r <= case_1_bound {
+            // Case 1.
+            q
+        } else {
+            // Case 2.
+            const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS);
+            q | 1
+        };
+        q_adj as FP * pow2(e)
+    }
+}
+
+/// Division rounded up for positive finite numbers.
+fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        1.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 < HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS
+        // SIGNIFICAND_BITS > HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS);
+        let q = ((aa - 1) / b.mantissa) as HalfBits + 1;
+        q.into()
+    } else if exp <= (Bits::BITS - 1) as Exponent {
+        let aa = DoubleBits::from(a.mantissa) << exp;
+        let bb = DoubleBits::from(b.mantissa);
+        // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1
+        //   <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX
+        let q = ((aa - 1) / bb) as Bits + 1;
+        q as FP
+    } else {
+        let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
+        let bb = DoubleBits::from(b.mantissa);
+        // e > 0
+        // The result is ceil((aa << e) / b).
+        let e = exp - (Bits::BITS - 1) as Exponent;
+
+        // aa = q * bb + r
+        // `q < Bits::MAX` as in the previous case.
+        // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`.
+        let q = (aa / bb) as Bits;
+        // 0 <= r < b
+        let r = (aa % bb) as Bits;
+
+        // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb)
+        // 0 <= (r << e) / bb < 2^e
+        //
+        // There are three cases:
+        // 1. ceil((r << e) / bb) = 0
+        // 2. 0 < ceil((r << e) / bb) < 2^e
+        // 3. ceil((r << e) / bb) = 2^e
+        //
+        // Case 1:
+        // The result is q << e.
+        //
+        // Case 2:
+        // The result is (q << e) + non-zero low e bits.
+        //
+        // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends
+        // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4,
+        // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits
+        // of q, it doesn't matter *which* bits are non-zero. As long as:
+        //
+        // SIGNIFICAND_BITS <= Bits::BITS - 4
+        //
+        // we can just set bit 0 of q to 1 instead of using extra low-order bits.
+        //
+        // Therefore the result rounds the same way as (q | 1) << e.
+        //
+        // In case 3:
+        // The result is (q + 1) << e.
+        //
+        // Case 1 happens when r = 0.
+        // Case 3 happens when:
+        // (r << e) / bb > (1 << e) - 1
+        // (r << e) > (bb << e) - b
+        // ((bb - r) << e) <= bb - 1
+        // bb - r <= (bb - 1) >> e
+        // r >= bb - ((bb - 1) >> e)
+        let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e);
+        let q_adj = if r == 0 {
+            // Case 1.
+            q
+        } else if r < case_3_bound {
+            // Case 2.
+            const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS);
+            q | 1
+        } else {
+            // Case 3.
+            // No overflow because `q < Bits::MAX`.
+            q + 1
+        };
+        q_adj as FP * pow2(e)
+    }
+}
diff --git a/library/std/src/f32/soft/tests.rs b/library/std/src/f32/soft/tests.rs
new file mode 100644
index 0000000000000..e7b7affab9ab5
--- /dev/null
+++ b/library/std/src/f32/soft/tests.rs
@@ -0,0 +1,49 @@
+use crate::f32::soft::{Positive, PositiveFinite, Representation, Sign, pow2};
+
+#[test]
+fn test_representation() {
+    assert_eq!(Representation::new(-1.5f32), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Finite(PositiveFinite { exp: -23, mantissa: 0xc00000 }),
+    });
+
+    assert_eq!(Representation::new(f32::MIN_POSITIVE), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -149, mantissa: 0x800000 }),
+    });
+
+    assert_eq!(Representation::new(f32::MIN_POSITIVE / 2.0), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -150, mantissa: 0x800000 }),
+    });
+
+    assert_eq!(Representation::new(f32::MAX), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: 104, mantissa: 0xffffff }),
+    });
+
+    assert_eq!(Representation::new(-0.0), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Zero,
+    });
+
+    assert_eq!(Representation::new(f32::INFINITY), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Infinity,
+    });
+
+    assert_eq!(Representation::new(f32::NAN), Representation {
+        sign: Sign::Positive,
+        abs: Positive::NaN,
+    });
+}
+
+#[test]
+fn test_pow2() {
+    assert_eq!(pow2(2), 4.0);
+    assert_eq!(pow2(-1), 0.5);
+    assert_eq!(pow2(-150), 0.0);
+    assert_eq!(pow2(-126), f32::MIN_POSITIVE);
+    assert_eq!(pow2(-127), f32::MIN_POSITIVE / 2.0);
+    assert_eq!(pow2(128), f32::INFINITY);
+}
diff --git a/library/std/src/f32/tests.rs b/library/std/src/f32/tests.rs
index 895719432000e..1bdc80a4f37b8 100644
--- a/library/std/src/f32/tests.rs
+++ b/library/std/src/f32/tests.rs
@@ -441,6 +441,16 @@ fn test_div_euclid() {
     assert_eq!(5.0f32.div_euclid(0.0), inf);
     assert_eq!((-5.0f32).div_euclid(0.0), -inf);
 
+    // Small / infinity.
+    assert_eq!(Ordering::Equal, 5.0f32.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, 0.0f32.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(inf).total_cmp(&-0.0));
+    assert_eq!((-5.0f32).div_euclid(inf), -1.0);
+    assert_eq!(Ordering::Equal, 5.0f32.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, 0.0f32.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(-inf).total_cmp(&0.0));
+    assert_eq!((-5.0f32).div_euclid(-inf), 1.0);
+
     // 0 / x
     assert_eq!(Ordering::Equal, 0.0f32.div_euclid(10.0).total_cmp(&0.0));
     assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(10.0).total_cmp(&-0.0));
@@ -448,6 +458,7 @@ fn test_div_euclid() {
     // small / large
     assert_eq!(Ordering::Equal, 1.0f32.div_euclid(10.0).total_cmp(&0.0));
     assert_eq!((-1.0f32).div_euclid(10.0), -1.0);
+    println!("{}", 1.0f32.div_euclid(-10.0));
     assert_eq!(Ordering::Equal, 1.0f32.div_euclid(-10.0).total_cmp(&-0.0));
     assert_eq!((-1.0f32).div_euclid(-10.0), 1.0);
 
diff --git a/library/std/src/f64.rs b/library/std/src/f64.rs
index fb17857ec6d94..751c77faff1a5 100644
--- a/library/std/src/f64.rs
+++ b/library/std/src/f64.rs
@@ -12,7 +12,7 @@
 #![stable(feature = "rust1", since = "1.0.0")]
 #![allow(missing_docs)]
 
-mod div_euclid;
+mod soft;
 
 #[cfg(test)]
 mod tests;
@@ -250,7 +250,7 @@ impl f64 {
     #[inline]
     #[stable(feature = "euclidean_division", since = "1.38.0")]
     pub fn div_euclid(self, rhs: f64) -> f64 {
-        div_euclid::div_euclid(self, rhs)
+        soft::div_euclid(self, rhs)
     }
 
     /// Calculates the least nonnegative remainder of `self (mod rhs)`.
diff --git a/library/std/src/f64/div_euclid.rs b/library/std/src/f64/div_euclid.rs
deleted file mode 100644
index f4346dd43afff..0000000000000
--- a/library/std/src/f64/div_euclid.rs
+++ /dev/null
@@ -1,204 +0,0 @@
-#[cfg(test)]
-mod tests;
-
-/// Software implementation of `f64::div_euclid`.
-#[allow(dead_code)]
-pub(crate) fn div_euclid(a: f64, b: f64) -> f64 {
-    if let Some((a_neg, a_exp, a_m)) = normal_form(a)
-        && let Some((b_neg, b_exp, b_m)) = normal_form(b)
-    {
-        let exp = a_exp - b_exp;
-        match (a_neg, b_neg) {
-            (false, false) => div_floor(exp, a_m, b_m),
-            (true, false) => -div_ceil(exp, a_m, b_m),
-            (false, true) => -div_floor(exp, a_m, b_m),
-            (true, true) => div_ceil(exp, a_m, b_m),
-        }
-    } else {
-        // `a` or `b` are +-0.0 or infinity or NaN.
-        // `a / b` is also +-0.0 or infinity or NaN.
-        // There is no need to round to an integer.
-        a / b
-    }
-}
-
-/// Returns `floor((a << exp) / b)`.
-///
-/// Requires `2^52 <= a, b < 2^53`.
-fn div_floor(exp: i32, a: u64, b: u64) -> f64 {
-    if exp < 0 {
-        0.0
-    } else if exp <= 11 {
-        // aa < (2^53 << 11) = 2^64
-        let aa = a << exp;
-        // q < 2^64 / 2^52 = 2^12
-        let q = (aa / b) as u32;
-        q.into()
-    } else if exp <= 63 {
-        // aa < (2^53 << 63) = 2^116
-        let aa = u128::from(a) << exp;
-        let bb = u128::from(b);
-        // q < 2^116 / 2^52 = 2^64
-        let q = (aa / bb) as u64;
-        q as f64
-    } else {
-        // aa >= (2^52 << 63) = 2^115
-        // aa < (2^53 << 63) = 2^116
-        let aa = u128::from(a) << 63;
-        let bb = u128::from(b);
-        // e > 0
-        // The result is floor((aa << e) / b).
-        let e = (exp - 63) as u32;
-
-        // aa = q * b + r
-        // q >= 2^115 / 2^53 = 2^62
-        // q < 2^116 / 2^52 = 2^64
-        let q = (aa / bb) as u64;
-        // 0 <= r < b
-        let r = (aa % bb) as u64;
-
-        // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
-        // 0 <= (r << e) / b < 2^e
-        //
-        // There are two cases:
-        // 1. floor((r << e) / b) = 0
-        // 2. 0 < floor((r << e) / b) < 2^e
-        //
-        // In case 1:
-        // The result is q << e.
-        //
-        // In case 2:
-        // The result is (q << e) + non-zero low e bits.
-        // This rounds the same way as (q | 1) << e because rounding beyond
-        // the 25 most significant bits of q depends only on whether the low-order
-        // bits are non-zero.
-        //
-        // Case 1 happens when:
-        // (r << e) / b < 1
-        // (r << e) <= b - 1
-        // r <= ((b - 1) >> e)
-        let case_1_bound = if e < 64 { (b - 1) >> e } else { 0 };
-        let q_adj = if r <= case_1_bound {
-            // Case 1.
-            q
-        } else {
-            // Case 2.
-            q | 1
-        };
-        q_adj as f64 * pow2(e)
-    }
-}
-
-/// Returns `ceil((a << exp) / b)`.
-///
-/// Requires `2^52 <= a, b < 2^53`.
-fn div_ceil(exp: i32, a: u64, b: u64) -> f64 {
-    if exp < 0 {
-        1.0
-    } else if exp <= 11 {
-        // aa < (2^53 << 11) = 2^64
-        let aa = a << exp;
-        // q < 2^64 / 2^52 + 1 = 2^12 + 1
-        let q = ((aa - 1) / b) as u32 + 1;
-        q.into()
-    } else if exp <= 63 {
-        // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63
-        let aa = u128::from(a) << exp;
-        let bb = u128::from(b);
-        // q <= (2^116 - 2^63) / 2^52 + 1 = 2^64 - 2^11 + 1
-        let q = ((aa - 1) / bb) as u64 + 1;
-        q as f64
-    } else {
-        // aa >= (2^52 << 63) = 2^115
-        // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63
-        let aa = u128::from(a) << 63;
-        let bb = u128::from(b);
-        // e > 0
-        // The result is ceil((aa << e) / b).
-        let e = (exp - 63) as u32;
-
-        // aa = q * b + r
-        // q >= 2^115 / 2^53 = 2^62
-        // q <= (2^116 - 2^63) / 2^52 = 2^64 - 2^11
-        let q = (aa / bb) as u64;
-        // 0 <= r < b
-        let r = (aa % bb) as u64;
-
-        // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
-        // 0 <= (r << e) / b < 2^e
-        //
-        // There are three cases:
-        // 1. ceil((r << e) / b) = 0
-        // 2. 0 < ceil((r << e) / b) < 2^e
-        // 3. ceil((r << e) / b) = 2^e
-        //
-        // In case 1:
-        // The result is q << e.
-        //
-        // In case 2:
-        // The result is (q << e) + non-zero low e bits.
-        // This rounds the same way as (q | 1) << e because rounding beyond
-        // the 54 most significant bits of q depends only on whether the low-order
-        // bits are non-zero.
-        //
-        // In case 3:
-        // The result is (q + 1) << e.
-        //
-        // Case 1 happens when r = 0.
-        // Case 3 happens when:
-        // (r << e) / b > (1 << e) - 1
-        // (r << e) > (b << e) - b
-        // ((b - r) << e) <= b - 1
-        // b - r <= (b - 1) >> e
-        // r >= b - ((b - 1) >> e)
-        let case_3_bound = b - if e < 64 { (b - 1) >> e } else { 0 };
-        let q_adj = if r == 0 {
-            // Case 1.
-            q
-        } else if r < case_3_bound {
-            // Case 2.
-            q | 1
-        } else {
-            // Case 3.
-            q + 1
-        };
-        q_adj as f64 * pow2(e)
-    }
-}
-
-/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`.
-///
-/// `x = (-1)^sign * 2^exp * mantissa`
-///
-/// `2^52 <= mantissa < 2^53`
-fn normal_form(x: f64) -> Option<(bool, i32, u64)> {
-    let bits = x.to_bits();
-    let sign = bits >> 63 != 0;
-    let biased_exponent = (bits >> 52 & 0x7ff) as i32;
-    let significand = bits & ((1 << 52) - 1);
-    match biased_exponent {
-        0 if significand == 0 => {
-            // 0.0
-            None
-        }
-        0 => {
-            // Subnormal number: 2^(-1022-52) * significand.
-            // We want mantissa to have exactly 11 leading zeros.
-            let shift = significand.leading_zeros() - 11;
-            Some((sign, -1022 - 52 - shift as i32, significand << shift))
-        }
-        0x7ff => {
-            // Infinity or NaN.
-            None
-        }
-        _ => {
-            // Normal number: 2^(biased_exponent-1023-52) * (2^52 + significand)
-            Some((sign, biased_exponent - 1023 - 52, 1 << 52 | significand))
-        }
-    }
-}
-
-/// Returns `2^exp`.
-fn pow2(exp: u32) -> f64 {
-    if exp <= 1023 { f64::from_bits(u64::from(exp + 1023) << 52) } else { f64::INFINITY }
-}
diff --git a/library/std/src/f64/div_euclid/tests.rs b/library/std/src/f64/div_euclid/tests.rs
deleted file mode 100644
index eeb43a6b7d868..0000000000000
--- a/library/std/src/f64/div_euclid/tests.rs
+++ /dev/null
@@ -1,21 +0,0 @@
-#[test]
-fn test_normal_form() {
-    use crate::f64::div_euclid::normal_form;
-
-    assert_eq!(normal_form(-1.5f64), Some((true, -52, 3 << 51)));
-    assert_eq!(normal_form(f64::MIN_POSITIVE), Some((false, -1074, 1 << 52)));
-    assert_eq!(normal_form(f64::MIN_POSITIVE / 2.0), Some((false, -1075, 1 << 52)));
-    assert_eq!(normal_form(f64::MAX), Some((false, 971, (1 << 53) - 1)));
-    assert_eq!(normal_form(0.0), None);
-    assert_eq!(normal_form(f64::INFINITY), None);
-    assert_eq!(normal_form(f64::NAN), None);
-}
-
-#[test]
-fn test_pow2() {
-    use crate::f64::div_euclid::pow2;
-
-    assert_eq!(pow2(0), 1.0f64);
-    assert_eq!(pow2(4), 16.0f64);
-    assert_eq!(pow2(1024), f64::INFINITY);
-}
diff --git a/library/std/src/f64/soft.rs b/library/std/src/f64/soft.rs
new file mode 100644
index 0000000000000..1a68dfdc85c2f
--- /dev/null
+++ b/library/std/src/f64/soft.rs
@@ -0,0 +1,344 @@
+//! Software floating point functions for `f64`.
+
+#[cfg(test)]
+mod tests;
+
+/// The floating point type.
+type FP = f64;
+
+/// Type of the exponent.
+type Exponent = i32;
+
+/// Type of `FP::to_bits`.
+type Bits = u64;
+
+/// Half as many bits as `Bits`.
+type HalfBits = u32;
+
+/// Twice as many bits as `Bits`.
+type DoubleBits = u128;
+
+/// Number of bits in the significand.
+const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1;
+
+/// Number of bits in the exponent.
+const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS;
+
+/// Encoded exponent = EXPONENT_BIAS + actual exponent.
+const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP;
+
+/// Represents an `FP` number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct Representation {
+    /// Sign.
+    sign: Sign,
+    /// Absolute value.
+    abs: Positive,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Sign {
+    Positive,
+    Negative,
+}
+
+/// Represents a positive number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+enum Positive {
+    Zero,
+    Finite(PositiveFinite),
+    Infinity,
+    NaN,
+}
+
+/// Represents a non-zero, positive finite number.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+struct PositiveFinite {
+    /// `number = 2^exp * mantissa`.
+    exp: Exponent,
+    /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)`
+    mantissa: Bits,
+}
+
+impl Representation {
+    fn new(x: FP) -> Self {
+        let bits = x.to_bits();
+
+        let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 {
+            Sign::Positive
+        } else {
+            Sign::Negative
+        };
+
+        const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1;
+        const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1;
+
+        let significand = bits & SIGNIFICAND_MASK;
+        let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK;
+
+        let abs = match biased_exponent {
+            0 if significand == 0 => Positive::Zero,
+            0 => {
+                // Subnormal number.
+                // Normalize it by shifting left, so that it has exactly
+                // SIGNIFICAND_BITS + 1 bits.
+                let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros());
+
+                Positive::Finite(PositiveFinite {
+                    exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent,
+                    mantissa: significand << shift,
+                })
+            }
+            EXPONENT_MASK if significand == 0 => Positive::Infinity,
+            EXPONENT_MASK => Positive::NaN,
+            _ => Positive::Finite(PositiveFinite {
+                exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent,
+                mantissa: 1 << SIGNIFICAND_BITS | significand,
+            }),
+        };
+        Representation { sign, abs }
+    }
+}
+
+/// Shift right that works even if `shift >= Bits::BITS`.
+///
+/// Requires `shift >= 0`.
+fn safe_shr(a: Bits, shift: Exponent) -> Bits {
+    if shift < Bits::BITS as Exponent { a >> shift } else { 0 }
+}
+
+/// Returns `2^exp`.
+fn pow2(exp: Exponent) -> FP {
+    let biased_exponent = exp + EXPONENT_BIAS;
+    if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) {
+        // Round to 0.
+        0.0
+    } else if biased_exponent <= 0 {
+        // Subnormal.
+        FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1))
+    } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 {
+        // Normal.
+        FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS)
+    } else {
+        // Round to infinity.
+        FP::INFINITY
+    }
+}
+
+/// Euclidean division.
+#[allow(dead_code)]
+pub(crate) fn div_euclid(a: FP, b: FP) -> FP {
+    let a = Representation::new(a);
+    let b = Representation::new(b);
+    let res = match a.sign {
+        Sign::Positive => div_floor_pos(a.abs, b.abs),
+        Sign::Negative => -div_ceil_pos(a.abs, b.abs),
+    };
+    match b.sign {
+        Sign::Positive => res,
+        Sign::Negative => -res,
+    }
+}
+
+/// Division rounded down for positive numbers.
+fn div_floor_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b),
+        (Positive::Finite(_), Positive::Infinity) => 0.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded up for positive numbers.
+fn div_ceil_pos(a: Positive, b: Positive) -> FP {
+    match (a, b) {
+        (Positive::NaN, _) => FP::NAN,
+        (_, Positive::NaN) => FP::NAN,
+        (Positive::Zero, Positive::Zero) => FP::NAN,
+        (Positive::Zero, Positive::Finite(_)) => 0.0,
+        (Positive::Zero, Positive::Infinity) => 0.0,
+        (Positive::Finite(_), Positive::Zero) => FP::INFINITY,
+        (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b),
+        // Tricky case. It's 1.0 rather than 0.0. This way
+        // `div_euclid(-finite, inf) = -1.0` is consistent with
+        // `rem_euclid(-finite, inf) = inf`.
+        (Positive::Finite(_), Positive::Infinity) => 1.0,
+        (Positive::Infinity, Positive::Zero) => FP::INFINITY,
+        (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY,
+        (Positive::Infinity, Positive::Infinity) => FP::NAN,
+    }
+}
+
+/// Division rounded down for positive finite numbers.
+fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        0.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 <= HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS
+        // SIGNIFICAND_BITS >= HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS);
+        let q = (aa / b.mantissa) as HalfBits;
+        q.into()
+    } else if exp <= (Bits::BITS - 1) as Exponent {
+        let aa = DoubleBits::from(a.mantissa) << exp;
+        let bb = DoubleBits::from(b.mantissa);
+        // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
+        let q = (aa / bb) as Bits;
+        q as FP
+    } else {
+        // exp >= Bits::BITS
+        let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
+        let bb = DoubleBits::from(b.mantissa);
+
+        // e > 0
+        // The result is `floor((aa << e) / bb)`
+        let e = exp - (Bits::BITS - 1) as Exponent;
+
+        // aa = q * bb + r
+        // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
+        // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`.
+        let q = (aa / bb) as Bits;
+        // 0 <= r < b
+        let r = (aa % bb) as Bits;
+
+        // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb)
+        // 0 <= (r << e) / bb < 2^e
+        //
+        // There are two cases:
+        // 1. floor((r << e) / bb) = 0
+        // 2. 0 < floor((r << e) / bb) < 2^e
+        //
+        // Case 1:
+        // The result is q << e.
+        //
+        // Case 2:
+        // The result is (q << e) + non-zero low e bits.
+        //
+        // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends
+        // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4,
+        // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits
+        // of q, it doesn't matter *which* bits are non-zero. As long as:
+        //
+        // SIGNIFICAND_BITS <= Bits::BITS - 4
+        //
+        // we can just set bit 0 of q to 1 instead of using extra low-order bits.
+        //
+        // Therefore the result rounds the same way as (q | 1) << e.
+        //
+        // Case 1 happens when:
+        // (r << e) / bb < 1
+        // (r << e) <= bb - 1
+        // r <= ((bb - 1) >> e)
+        let case_1_bound = safe_shr(b.mantissa - 1, e);
+        let q_adj = if r <= case_1_bound {
+            // Case 1.
+            q
+        } else {
+            // Case 2.
+            const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS);
+            q | 1
+        };
+        q_adj as FP * pow2(e)
+    }
+}
+
+/// Division rounded up for positive finite numbers.
+fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
+    let exp = a.exp - b.exp;
+    if exp < 0 {
+        1.0
+    } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent {
+        // `aa` fits in `Bits`
+        let aa = a.mantissa << exp;
+        // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as:
+        // exp + 1 < HalfBits::BITS
+        // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS
+        // SIGNIFICAND_BITS > HalfBits::BITS
+        const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS);
+        let q = ((aa - 1) / b.mantissa) as HalfBits + 1;
+        q.into()
+    } else if exp <= (Bits::BITS - 1) as Exponent {
+        let aa = DoubleBits::from(a.mantissa) << exp;
+        let bb = DoubleBits::from(b.mantissa);
+        // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1
+        //   <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX
+        let q = ((aa - 1) / bb) as Bits + 1;
+        q as FP
+    } else {
+        let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
+        let bb = DoubleBits::from(b.mantissa);
+        // e > 0
+        // The result is ceil((aa << e) / b).
+        let e = exp - (Bits::BITS - 1) as Exponent;
+
+        // aa = q * bb + r
+        // `q < Bits::MAX` as in the previous case.
+        // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`.
+        let q = (aa / bb) as Bits;
+        // 0 <= r < b
+        let r = (aa % bb) as Bits;
+
+        // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb)
+        // 0 <= (r << e) / bb < 2^e
+        //
+        // There are three cases:
+        // 1. ceil((r << e) / bb) = 0
+        // 2. 0 < ceil((r << e) / bb) < 2^e
+        // 3. ceil((r << e) / bb) = 2^e
+        //
+        // Case 1:
+        // The result is q << e.
+        //
+        // Case 2:
+        // The result is (q << e) + non-zero low e bits.
+        //
+        // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends
+        // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4,
+        // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits
+        // of q, it doesn't matter *which* bits are non-zero. As long as:
+        //
+        // SIGNIFICAND_BITS <= Bits::BITS - 4
+        //
+        // we can just set bit 0 of q to 1 instead of using extra low-order bits.
+        //
+        // Therefore the result rounds the same way as (q | 1) << e.
+        //
+        // In case 3:
+        // The result is (q + 1) << e.
+        //
+        // Case 1 happens when r = 0.
+        // Case 3 happens when:
+        // (r << e) / bb > (1 << e) - 1
+        // (r << e) > (bb << e) - b
+        // ((bb - r) << e) <= bb - 1
+        // bb - r <= (bb - 1) >> e
+        // r >= bb - ((bb - 1) >> e)
+        let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e);
+        let q_adj = if r == 0 {
+            // Case 1.
+            q
+        } else if r < case_3_bound {
+            // Case 2.
+            const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS);
+            q | 1
+        } else {
+            // Case 3.
+            // No overflow because `q < Bits::MAX`.
+            q + 1
+        };
+        q_adj as FP * pow2(e)
+    }
+}
diff --git a/library/std/src/f64/soft/tests.rs b/library/std/src/f64/soft/tests.rs
new file mode 100644
index 0000000000000..c74b2bbebaadb
--- /dev/null
+++ b/library/std/src/f64/soft/tests.rs
@@ -0,0 +1,49 @@
+use crate::f64::soft::{Positive, PositiveFinite, Representation, Sign, pow2};
+
+#[test]
+fn test_representation() {
+    assert_eq!(Representation::new(-1.5f64), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Finite(PositiveFinite { exp: -52, mantissa: 3 << 51 }),
+    });
+
+    assert_eq!(Representation::new(f64::MIN_POSITIVE), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -1074, mantissa: 1 << 52 }),
+    });
+
+    assert_eq!(Representation::new(f64::MIN_POSITIVE / 2.0), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: -1075, mantissa: 1 << 52 }),
+    });
+
+    assert_eq!(Representation::new(f64::MAX), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Finite(PositiveFinite { exp: 971, mantissa: (1 << 53) - 1 }),
+    });
+
+    assert_eq!(Representation::new(-0.0f64), Representation {
+        sign: Sign::Negative,
+        abs: Positive::Zero,
+    });
+
+    assert_eq!(Representation::new(f64::INFINITY), Representation {
+        sign: Sign::Positive,
+        abs: Positive::Infinity,
+    });
+
+    assert_eq!(Representation::new(f64::NAN), Representation {
+        sign: Sign::Positive,
+        abs: Positive::NaN,
+    });
+}
+
+#[test]
+fn test_pow2() {
+    assert_eq!(pow2(2), 4.0);
+    assert_eq!(pow2(-1), 0.5);
+    assert_eq!(pow2(-1075), 0.0);
+    assert_eq!(pow2(-1022), f64::MIN_POSITIVE);
+    assert_eq!(pow2(-1023), f64::MIN_POSITIVE / 2.0);
+    assert_eq!(pow2(1024), f64::INFINITY);
+}
diff --git a/library/std/src/f64/tests.rs b/library/std/src/f64/tests.rs
index 0499cc4516b4e..b6147326818ea 100644
--- a/library/std/src/f64/tests.rs
+++ b/library/std/src/f64/tests.rs
@@ -429,6 +429,16 @@ fn test_div_euclid() {
     assert_eq!(5.0f64.div_euclid(0.0), inf);
     assert_eq!((-5.0f64).div_euclid(0.0), -inf);
 
+    // Small / infinity.
+    assert_eq!(Ordering::Equal, 5.0f64.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, 0.0f64.div_euclid(inf).total_cmp(&0.0));
+    assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(inf).total_cmp(&-0.0));
+    assert_eq!((-5.0f64).div_euclid(inf), -1.0);
+    assert_eq!(Ordering::Equal, 5.0f64.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, 0.0f64.div_euclid(-inf).total_cmp(&-0.0));
+    assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(-inf).total_cmp(&0.0));
+    assert_eq!((-5.0f64).div_euclid(-inf), 1.0);
+
     // 0 / x
     assert_eq!(Ordering::Equal, 0.0f64.div_euclid(10.0).total_cmp(&0.0));
     assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(10.0).total_cmp(&-0.0));
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index dbbfed619559d..df5d6f6cd3930 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -677,6 +677,7 @@ pub mod alloc;
 
 // Private support modules
 mod panicking;
+mod u256;
 
 #[path = "../../backtrace/src/lib.rs"]
 #[allow(dead_code, unused_attributes, fuzzy_provenance_casts, unsafe_op_in_unsafe_fn)]
diff --git a/library/std/src/u256.rs b/library/std/src/u256.rs
new file mode 100644
index 0000000000000..7094753257dc6
--- /dev/null
+++ b/library/std/src/u256.rs
@@ -0,0 +1,117 @@
+//! Limited unsigned 256-bit arithmetic for internal use.
+
+#[cfg(test)]
+mod tests;
+
+use core::ops::{BitOr, BitOrAssign, Div, DivAssign, Shl, ShlAssign, Sub, SubAssign};
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub(crate) struct U256 {
+    high: u128,
+    low: u128,
+}
+
+impl U256 {
+    pub(crate) const ZERO: Self = Self { high: 0, low: 0 };
+
+    pub(crate) const ONE: Self = Self { high: 0, low: 1 };
+
+    /// The number of leading 0 bits.
+    pub(crate) fn leading_zeros(self) -> u32 {
+        if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() }
+    }
+
+    /// Returns (quotient, remainder).
+    pub(crate) fn div_rem(mut self, rhs: Self) -> (Self, Self) {
+        assert!(rhs != Self::ZERO);
+        let shift_limit = (rhs.leading_zeros() + 1).saturating_sub(self.leading_zeros());
+        let mut q = Self::ZERO;
+        for shift in (0..shift_limit).rev() {
+            let rhs_shifted = rhs << shift;
+            if self >= rhs_shifted {
+                q |= Self::ONE << shift;
+                self -= rhs_shifted;
+            }
+        }
+        (q, self)
+    }
+
+    /// Wrap to `u128`.
+    pub(crate) fn wrap_u128(self) -> u128 {
+        self.low
+    }
+}
+
+impl From<u128> for U256 {
+    fn from(value: u128) -> Self {
+        Self { high: 0, low: value }
+    }
+}
+
+impl Shl<u32> for U256 {
+    type Output = Self;
+
+    fn shl(self, rhs: u32) -> Self {
+        if rhs == 0 {
+            self
+        } else if rhs < 128 {
+            Self { high: self.high << rhs | self.low >> (128 - rhs), low: self.low << rhs }
+        } else if rhs == 128 {
+            Self { high: self.low, low: 0 }
+        } else if rhs < 256 {
+            Self { high: self.low << (rhs - 128), low: 0 }
+        } else {
+            Self::ZERO
+        }
+    }
+}
+
+impl ShlAssign<u32> for U256 {
+    fn shl_assign(&mut self, rhs: u32) {
+        *self = *self << rhs;
+    }
+}
+
+impl Sub for U256 {
+    type Output = Self;
+
+    fn sub(self, rhs: U256) -> Self {
+        let (low, borrow) = self.low.overflowing_sub(rhs.low);
+        let (high, _) = self.high.borrowing_sub(rhs.high, borrow);
+        Self { low, high }
+    }
+}
+
+impl SubAssign for U256 {
+    fn sub_assign(&mut self, rhs: U256) {
+        *self = *self - rhs;
+    }
+}
+
+impl Div<U256> for U256 {
+    type Output = Self;
+
+    fn div(self, rhs: U256) -> Self {
+        self.div_rem(rhs).0
+    }
+}
+
+impl DivAssign for U256 {
+    fn div_assign(&mut self, rhs: U256) {
+        *self = *self / rhs;
+    }
+}
+
+impl BitOr<U256> for U256 {
+    type Output = Self;
+
+    fn bitor(self, rhs: U256) -> Self {
+        Self { high: self.high | rhs.high, low: self.low | rhs.low }
+    }
+}
+
+impl BitOrAssign<U256> for U256 {
+    fn bitor_assign(&mut self, rhs: U256) {
+        *self = *self | rhs;
+    }
+}
diff --git a/library/std/src/u256/tests.rs b/library/std/src/u256/tests.rs
new file mode 100644
index 0000000000000..0466b519d6f5d
--- /dev/null
+++ b/library/std/src/u256/tests.rs
@@ -0,0 +1,66 @@
+use crate::u256::U256;
+
+#[test]
+fn test_leading_zeros() {
+    assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256);
+    assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116);
+    assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252);
+}
+
+#[test]
+fn test_wrap_u128() {
+    assert_eq!(U256 { high: 3, low: 4 }.wrap_u128(), 4);
+}
+
+#[test]
+fn test_from_u128() {
+    assert_eq!(U256::from(7u128), U256 { high: 0, low: 7 });
+}
+
+#[test]
+fn test_shl() {
+    let a = U256 { high: 0x1234, low: 0x0123456789abcdef0123456789abcdef };
+
+    assert_eq!(a << 0, a);
+    assert_eq!(a << 16, U256 { high: 0x12340123, low: 0x456789abcdef0123456789abcdef0000 });
+    assert_eq!(a << 128, U256 { high: 0x0123456789abcdef0123456789abcdef, low: 0 });
+    assert_eq!(a << 140, U256 { high: 0x3456789abcdef0123456789abcdef000, low: 0 });
+    assert_eq!(a << 256, U256::ZERO);
+    assert_eq!(a << 1000, U256::ZERO);
+}
+
+#[test]
+fn test_sub() {
+    let a = U256 { high: 7, low: 10 };
+    let b = U256 { high: 3, low: 2 };
+    let c = U256 { high: 3, low: 15 };
+    assert_eq!(a - b, U256 { high: 4, low: 8 });
+    assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 });
+}
+
+#[test]
+fn test_div_rem() {
+    let a = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef };
+    let b = U256 { high: 0, low: 0x987 };
+    let c = U256 { high: 0x13, low: 0x9876543210abc };
+    assert_eq!(
+        a.div_rem(b),
+        (U256 { high: 0x1208, low: 0x63d18447855b62f52e9a6983a9e22813 }, U256 {
+            high: 0,
+            low: 0xea
+        })
+    );
+    assert_eq!(
+        a.div_rem(c),
+        (U256 { high: 0, low: 0x90ad6 }, U256 {
+            high: 0xd,
+            low: 0x0123456789abcd98d752c5f8c1057cc7
+        },)
+    );
+    assert_eq!(c.div_rem(a), (U256::ZERO, c));
+}
+
+#[test]
+fn test_div() {
+    assert_eq!(U256::from(47u128) / U256::from(10u128), U256::from(4u128));
+}

From 009cf4b8f8f855318bcd7dde209110c166eb8145 Mon Sep 17 00:00:00 2001
From: Tomek Czajka <tczajka@gmail.com>
Date: Thu, 12 Dec 2024 11:15:14 +0100
Subject: [PATCH 4/5] div_euclid benchmarks

---
 library/std/benches/f128/mod.rs | 18 ++++++++++++++++++
 library/std/benches/f16/mod.rs  | 13 +++++++++++++
 library/std/benches/f32/mod.rs  | 18 ++++++++++++++++++
 library/std/benches/f64/mod.rs  | 18 ++++++++++++++++++
 library/std/benches/lib.rs      |  6 ++++++
 5 files changed, 73 insertions(+)
 create mode 100644 library/std/benches/f128/mod.rs
 create mode 100644 library/std/benches/f16/mod.rs
 create mode 100644 library/std/benches/f32/mod.rs
 create mode 100644 library/std/benches/f64/mod.rs

diff --git a/library/std/benches/f128/mod.rs b/library/std/benches/f128/mod.rs
new file mode 100644
index 0000000000000..08ca8b3dd005c
--- /dev/null
+++ b/library/std/benches/f128/mod.rs
@@ -0,0 +1,18 @@
+use core::f128::consts::PI;
+
+use test::{Bencher, black_box};
+
+#[bench]
+fn div_euclid_small(b: &mut Bencher) {
+    b.iter(|| black_box(10000.12345f128).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_medium(b: &mut Bencher) {
+    b.iter(|| black_box(1.123e30f128).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_large(b: &mut Bencher) {
+    b.iter(|| black_box(1.123e4000f128).div_euclid(black_box(PI)));
+}
diff --git a/library/std/benches/f16/mod.rs b/library/std/benches/f16/mod.rs
new file mode 100644
index 0000000000000..e0d33df31ef78
--- /dev/null
+++ b/library/std/benches/f16/mod.rs
@@ -0,0 +1,13 @@
+use core::f16::consts::PI;
+
+use test::{Bencher, black_box};
+
+#[bench]
+fn div_euclid_small(b: &mut Bencher) {
+    b.iter(|| black_box(20.12f16).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_large(b: &mut Bencher) {
+    b.iter(|| black_box(50000.0f16).div_euclid(black_box(PI)));
+}
diff --git a/library/std/benches/f32/mod.rs b/library/std/benches/f32/mod.rs
new file mode 100644
index 0000000000000..7b45e28e09c48
--- /dev/null
+++ b/library/std/benches/f32/mod.rs
@@ -0,0 +1,18 @@
+use core::f32::consts::PI;
+
+use test::{Bencher, black_box};
+
+#[bench]
+fn div_euclid_small(b: &mut Bencher) {
+    b.iter(|| black_box(130.12345f32).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_medium(b: &mut Bencher) {
+    b.iter(|| black_box(1.123e7f32).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_large(b: &mut Bencher) {
+    b.iter(|| black_box(1.123e32f32).div_euclid(black_box(PI)));
+}
diff --git a/library/std/benches/f64/mod.rs b/library/std/benches/f64/mod.rs
new file mode 100644
index 0000000000000..fae017b69d93d
--- /dev/null
+++ b/library/std/benches/f64/mod.rs
@@ -0,0 +1,18 @@
+use core::f64::consts::PI;
+
+use test::{Bencher, black_box};
+
+#[bench]
+fn div_euclid_small(b: &mut Bencher) {
+    b.iter(|| black_box(1234.1234578f64).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_medium(b: &mut Bencher) {
+    b.iter(|| black_box(1.123e15f64).div_euclid(black_box(PI)));
+}
+
+#[bench]
+fn div_euclid_large(b: &mut Bencher) {
+    b.iter(|| black_box(1.123e300f64).div_euclid(black_box(PI)));
+}
diff --git a/library/std/benches/lib.rs b/library/std/benches/lib.rs
index 1b21c230a0bf2..7497c65490d97 100644
--- a/library/std/benches/lib.rs
+++ b/library/std/benches/lib.rs
@@ -1,7 +1,13 @@
 // Disabling in Miri as these would take too long.
 #![cfg(not(miri))]
 #![feature(test)]
+#![feature(f16)]
+#![feature(f128)]
 
 extern crate test;
 
+mod f128;
+mod f16;
+mod f32;
+mod f64;
 mod hash;

From d0f8bae821930eb0a4e34b0326f36bb3ff374ef5 Mon Sep 17 00:00:00 2001
From: Tomek Czajka <tczajka@gmail.com>
Date: Thu, 12 Dec 2024 11:36:05 +0100
Subject: [PATCH 5/5] div_euclid: add a fast path for overflow

---
 library/std/src/f128/soft.rs | 12 ++++++++++--
 library/std/src/f32/soft.rs  | 12 ++++++++++--
 library/std/src/f64/soft.rs  | 12 ++++++++++--
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/library/std/src/f128/soft.rs b/library/std/src/f128/soft.rs
index 30b6e82e18086..86befe8dd92d0 100644
--- a/library/std/src/f128/soft.rs
+++ b/library/std/src/f128/soft.rs
@@ -201,7 +201,7 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
         // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
         let q: Bits = (aa / bb).wrap_u128();
         q as FP
-    } else {
+    } else if exp <= FP::MAX_EXP {
         // exp >= Bits::BITS
         let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
         let bb = DoubleBits::from(b.mantissa);
@@ -256,6 +256,10 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
             q | 1
         };
         q_adj as FP * pow2(e)
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
     }
 }
 
@@ -282,7 +286,7 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
         //   <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX
         let q = ((aa - U256::ONE) / bb).wrap_u128() + 1;
         q as FP
-    } else {
+    } else if exp <= FP::MAX_EXP {
         let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
         let bb = DoubleBits::from(b.mantissa);
         // e > 0
@@ -346,5 +350,9 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
             q + 1
         };
         q_adj as FP * pow2(e)
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
     }
 }
diff --git a/library/std/src/f32/soft.rs b/library/std/src/f32/soft.rs
index 9a280893eccf6..f5af4561881ad 100644
--- a/library/std/src/f32/soft.rs
+++ b/library/std/src/f32/soft.rs
@@ -198,7 +198,7 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
         // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
         let q = (aa / bb) as Bits;
         q as FP
-    } else {
+    } else if exp <= FP::MAX_EXP {
         // exp >= Bits::BITS
         let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
         let bb = DoubleBits::from(b.mantissa);
@@ -252,6 +252,10 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
             q | 1
         };
         q_adj as FP * pow2(e)
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
     }
 }
 
@@ -277,7 +281,7 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
         //   <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX
         let q = ((aa - 1) / bb) as Bits + 1;
         q as FP
-    } else {
+    } else if exp <= FP::MAX_EXP {
         let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
         let bb = DoubleBits::from(b.mantissa);
         // e > 0
@@ -340,5 +344,9 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
             q + 1
         };
         q_adj as FP * pow2(e)
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
     }
 }
diff --git a/library/std/src/f64/soft.rs b/library/std/src/f64/soft.rs
index 1a68dfdc85c2f..2106a25aaa2c7 100644
--- a/library/std/src/f64/soft.rs
+++ b/library/std/src/f64/soft.rs
@@ -198,7 +198,7 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
         // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`.
         let q = (aa / bb) as Bits;
         q as FP
-    } else {
+    } else if exp <= FP::MAX_EXP {
         // exp >= Bits::BITS
         let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
         let bb = DoubleBits::from(b.mantissa);
@@ -252,6 +252,10 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
             q | 1
         };
         q_adj as FP * pow2(e)
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
     }
 }
 
@@ -277,7 +281,7 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
         //   <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX
         let q = ((aa - 1) / bb) as Bits + 1;
         q as FP
-    } else {
+    } else if exp <= FP::MAX_EXP {
         let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1);
         let bb = DoubleBits::from(b.mantissa);
         // e > 0
@@ -340,5 +344,9 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP {
             q + 1
         };
         q_adj as FP * pow2(e)
+    } else {
+        // a.mantissa / b.mantissa >= 1/2, hence
+        // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP
+        FP::INFINITY
     }
 }