From 3deff8419610470146cbffc8210116cf7e7f9509 Mon Sep 17 00:00:00 2001 From: Tomek Czajka Date: Tue, 10 Dec 2024 15:03:01 +0100 Subject: [PATCH 1/5] Correctly rounded floating point `div_euclid`. Fixes #107904. --- library/std/src/f128.rs | 33 ++-- library/std/src/f128/div_euclid.rs | 204 +++++++++++++++++++++++ library/std/src/f128/div_euclid/tests.rs | 23 +++ library/std/src/f128/tests.rs | 117 ++++++++++++- library/std/src/f128/u256.rs | 63 +++++++ library/std/src/f128/u256/tests.rs | 39 +++++ library/std/src/f16.rs | 32 ++-- library/std/src/f16/div_euclid.rs | 111 ++++++++++++ library/std/src/f16/div_euclid/tests.rs | 13 ++ library/std/src/f16/tests.rs | 61 +++++++ library/std/src/f32.rs | 32 ++-- library/std/src/f32/div_euclid.rs | 204 +++++++++++++++++++++++ library/std/src/f32/div_euclid/tests.rs | 21 +++ library/std/src/f32/tests.rs | 105 ++++++++++++ library/std/src/f64.rs | 32 ++-- library/std/src/f64/div_euclid.rs | 204 +++++++++++++++++++++++ library/std/src/f64/div_euclid/tests.rs | 21 +++ library/std/src/f64/tests.rs | 116 +++++++++++++ library/std/src/lib.rs | 1 + 19 files changed, 1367 insertions(+), 65 deletions(-) create mode 100644 library/std/src/f128/div_euclid.rs create mode 100644 library/std/src/f128/div_euclid/tests.rs create mode 100644 library/std/src/f128/u256.rs create mode 100644 library/std/src/f128/u256/tests.rs create mode 100644 library/std/src/f16/div_euclid.rs create mode 100644 library/std/src/f16/div_euclid/tests.rs create mode 100644 library/std/src/f32/div_euclid.rs create mode 100644 library/std/src/f32/div_euclid/tests.rs create mode 100644 library/std/src/f64/div_euclid.rs create mode 100644 library/std/src/f64/div_euclid/tests.rs diff --git a/library/std/src/f128.rs b/library/std/src/f128.rs index e93e915159e40..56f3df1b4aa95 100644 --- a/library/std/src/f128.rs +++ b/library/std/src/f128.rs @@ -4,6 +4,9 @@ //! //! Mathematically significant numbers are provided in the `consts` sub-module. +mod div_euclid; +mod u256; + #[cfg(test)] mod tests; @@ -235,10 +238,14 @@ impl f128 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -264,29 +271,23 @@ impl f128 { #[unstable(feature = "f128", issue = "116909")] #[must_use = "method returns a new number and does not mutate the original value"] pub fn div_euclid(self, rhs: f128) -> f128 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + div_euclid::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f128/div_euclid.rs b/library/std/src/f128/div_euclid.rs new file mode 100644 index 0000000000000..38334ac674aaf --- /dev/null +++ b/library/std/src/f128/div_euclid.rs @@ -0,0 +1,204 @@ +#[cfg(test)] +mod tests; + +use crate::f128::u256::U256; + +/// Software implementation of `f128::div_euclid`. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: f128, b: f128) -> f128 { + if let Some((a_neg, a_exp, a_m)) = normal_form(a) + && let Some((b_neg, b_exp, b_m)) = normal_form(b) + { + let exp = a_exp - b_exp; + match (a_neg, b_neg) { + (false, false) => div_floor(exp, a_m, b_m), + (true, false) => -div_ceil(exp, a_m, b_m), + (false, true) => -div_floor(exp, a_m, b_m), + (true, true) => div_ceil(exp, a_m, b_m), + } + } else { + // `a` or `b` are +-0.0 or infinity or NaN. + // `a / b` is also +-0.0 or infinity or NaN. + // There is no need to round to an integer. + a / b + } +} + +/// Returns `floor((a << exp) / b)`. +/// +/// Requires `2^112 <= a, b < 2^113`. +fn div_floor(exp: i32, a: u128, b: u128) -> f128 { + if exp < 0 { + 0.0 + } else if exp <= 15 { + // aa < (2^113 << 15) = 2^128 + let aa = a << exp; + // q < 2^128 / 2^112 = 2^16 + let q = (aa / b) as u32; + // We have to use `as` because `From for f128` is not yet implemented. + q as f128 + } else if exp <= 127 { + // aa = a << exp + // aa < (2^113 << 127) = 2^240 + let aa = U256::shl_u128(a, exp as u32); + // q < 2^240 / 2^112 = 2^128 + let (q, _) = aa.div_rem(b); + q as f128 + } else { + // aa >= (2^112 << 127) = 2^239 + // aa < (2^113 << 127) = 2^240 + let aa = U256::shl_u128(a, 127); + // e > 0 + // The result is floor((aa << e) / b). + let e = (exp - 127) as u32; + + // aa = q * b + r + // q >= 2^239 / 2^113 = 2^126 + // q < 2^239 / 2^112 = 2^128 + // 0 <= r < b + let (q, r) = aa.div_rem(b); + + // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b) + // 0 <= (r << e) / b < 2^e + // + // There are two cases: + // 1. floor((r << e) / b) = 0 + // 2. 0 < floor((r << e) / b) < 2^e + // + // In case 1: + // The result is q << e. + // + // In case 2: + // The result is (q << e) + non-zero low e bits. + // This rounds the same way as (q | 1) << e because rounding beyond + // the 25 most significant bits of q depends only on whether the low-order + // bits are non-zero. + // + // Case 1 happens when: + // (r << e) / b < 1 + // (r << e) <= b - 1 + // r <= ((b - 1) >> e) + let case_1_bound = if e < 128 { (b - 1) >> e } else { 0 }; + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + q | 1 + }; + q_adj as f128 * pow2(e) + } +} + +/// Returns `ceil((a << exp) / b)`. +/// +/// Requires `2^112 <= a, b < 2^113`. +fn div_ceil(exp: i32, a: u128, b: u128) -> f128 { + if exp < 0 { + 1.0 + } else if exp <= 15 { + // aa < (2^113 << 15) = 2^128 + let aa = a << exp; + // q < 2^128 / 2^112 + 1 = 2^16 + 1 + let q = ((aa - 1) / b) as u32 + 1; + // We have to use `as` because `From for f128` is not yet implemented. + q as f128 + } else if exp <= 127 { + // aa = a << exp + // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127 + let aa = U256::shl_u128(a, exp as u32); + // q <= (2^240 - 2^127) / 2^112 + 1 = 2^128 - 2^15 + 1 + let (q, _) = (aa - U256::ONE).div_rem(b); + (q + 1) as f128 + } else { + // aa >= (2^112 << 127) = 2^239 + // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127 + let aa = U256::shl_u128(a, 127); + // e > 0 + // The result is ceil((aa << e) / b). + let e = (exp - 127) as u32; + + // aa = q * b + r + // q >= 2^239 / 2^112 = 2^126 + // q <= (2^240 - 2^127) / 2^112 = 2^128 - 2^15 + // 0 <= r < b + let (q, r) = aa.div_rem(b); + + // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b) + // 0 <= (r << e) / b < 2^e + // + // There are three cases: + // 1. ceil((r << e) / b) = 0 + // 2. 0 < ceil((r << e) / b) < 2^e + // 3. ceil((r << e) / b) = 2^e + // + // In case 1: + // The result is q << e. + // + // In case 2: + // The result is (q << e) + non-zero low e bits. + // This rounds the same way as (q | 1) << e because rounding beyond + // the 54 most significant bits of q depends only on whether the low-order + // bits are non-zero. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / b > (1 << e) - 1 + // (r << e) > (b << e) - b + // ((b - r) << e) <= b - 1 + // b - r <= (b - 1) >> e + // r >= b - ((b - 1) >> e) + let case_3_bound = b - if e < 128 { (b - 1) >> e } else { 0 }; + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + q | 1 + } else { + // Case 3. + q + 1 + }; + q_adj as f128 * pow2(e) + } +} + +/// For finite, non-zero numbers returns (sign, exponent, mantissa). +/// +/// `x = (-1)^sign * 2^exp * mantissa` +/// +/// `2^112 <= mantissa < 2^113` +fn normal_form(x: f128) -> Option<(bool, i32, u128)> { + let bits = x.to_bits(); + let sign = bits >> 127 != 0; + let biased_exponent = (bits >> 112 & 0x7fff) as i32; + let significand = bits & ((1 << 112) - 1); + match biased_exponent { + 0 if significand == 0 => { + // 0.0 + None + } + 0 => { + // Subnormal number: 2^(-16382-112) * significand. + // We want mantissa to have exactly 15 leading zeros. + let shift = significand.leading_zeros() - 15; + Some((sign, -16382 - 112 - shift as i32, significand << shift)) + } + 0x7fff => { + // Infinity or NaN. + None + } + _ => { + // Normal number: 2^(biased_exponent-16383-112) * (2^112 + significand) + Some((sign, biased_exponent - 16383 - 112, 1 << 112 | significand)) + } + } +} + +/// Returns `2^exp`. +fn pow2(exp: u32) -> f128 { + if exp <= 16383 { f128::from_bits(u128::from(exp + 16383) << 112) } else { f128::INFINITY } +} diff --git a/library/std/src/f128/div_euclid/tests.rs b/library/std/src/f128/div_euclid/tests.rs new file mode 100644 index 0000000000000..a89004bf10ea8 --- /dev/null +++ b/library/std/src/f128/div_euclid/tests.rs @@ -0,0 +1,23 @@ +#![cfg(reliable_f128_math)] + +#[test] +fn test_normal_form() { + use crate::f128::div_euclid::normal_form; + + assert_eq!(normal_form(-1.5f128), Some((true, -112, 3 << 111))); + assert_eq!(normal_form(f128::MIN_POSITIVE), Some((false, -16494, 1 << 112))); + assert_eq!(normal_form(f128::MIN_POSITIVE / 2.0), Some((false, -16495, 1 << 112))); + assert_eq!(normal_form(f128::MAX), Some((false, 16271, (1 << 113) - 1))); + assert_eq!(normal_form(0.0), None); + assert_eq!(normal_form(f128::INFINITY), None); + assert_eq!(normal_form(f128::NAN), None); +} + +#[test] +fn test_pow2() { + use crate::f128::div_euclid::pow2; + + assert_eq!(pow2(0), 1.0f128); + assert_eq!(pow2(4), 16.0f128); + assert_eq!(pow2(16384), f128::INFINITY); +} diff --git a/library/std/src/f128/tests.rs b/library/std/src/f128/tests.rs index cbcf9f96239bb..299d74d19b005 100644 --- a/library/std/src/f128/tests.rs +++ b/library/std/src/f128/tests.rs @@ -460,7 +460,122 @@ fn test_mul_add() { } #[test] -#[cfg(reliable_f16_math)] +#[cfg(reliable_f128_math)] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f128::NAN; + let inf = f128::INFINITY; + let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f128.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f128.div_euclid(0.0), inf); + assert_eq!((-5.0f128).div_euclid(0.0), -inf); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f128.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f128.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f128).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f128.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f128).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f128.div_euclid(10.0), 2.0); + assert_eq!((-20.0f128).div_euclid(10.0), -2.0); + assert_eq!(20.0f128.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f128).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f128.div_euclid(10.0), 2.0); + assert_eq!((-23.0f128).div_euclid(10.0), -3.0); + assert_eq!(23.0f128.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f128).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Medium results. + assert_eq!(1e9f128.div_euclid(10.0), 1e8); + assert_eq!((-1e9f128).div_euclid(10.0), -1e8); + assert_eq!(1e9f128.div_euclid(-10.0), -1e8); + assert_eq!((-1e9f128).div_euclid(-10.0), 1e8); + + assert_eq!(1_000_000_003.0f128.div_euclid(10.0), 100_000_000.0); + assert_eq!((-1_000_000_003.0f128).div_euclid(10.0), -100_000_001.0); + assert_eq!(1_000_000_003.0f128.div_euclid(-10.0), -100_000_000.0); + assert_eq!((-1_000_000_003.0f128).div_euclid(-10.0), 100_000_001.0); + + // Infinite results. + assert_eq!(1e4000f128.div_euclid(1e-4000), inf); + assert_eq!((-1e4000f128).div_euclid(1e-4000), -inf); + assert_eq!(1e4000f128.div_euclid(-1e-4000), -inf); + assert_eq!((-1e4000f128).div_euclid(-1e-4000), inf); + + // Large results, exactly divisible. + assert_eq!(1e4000f128.div_euclid(2.0), 0.5e4000); + assert_eq!((-1e4000f128).div_euclid(2.0), -0.5e4000); + assert_eq!(1e4000f128.div_euclid(-2.0), -0.5e4000); + assert_eq!((-1e4000f128).div_euclid(-2.0), 0.5e4000); + + // Large results, not divisible. + let a = 4281743078117879643174857908348485409148239872f128; // 3 * 2^150 + let b = 7f128; + // a / b = 0x1b6db6db6db6db6db6db6db6db6db6db6db6db.6d... + // = 611677582588268520453551129764069344164034267.42... + // floor(a / b) = 0x1b6db6db6db6db6db6db6db6db6db6db6db6db + // ceil(a / b) = 0x1b6db6db6db6db6db6db6db6db6db6db6db6dc + // Both round to 0x1b6db6db6db6db6db6db6db6db6db000000000 + // = 611677582588268520453551129764069314712829952 + assert_eq!(a.div_euclid(b), 611677582588268520453551129764069314712829952f128); + assert_eq!((-a).div_euclid(-b), 611677582588268520453551129764069314712829952f128); + + // Large results, test precise rounding edge cases. + + // a = 0x3abab02be95086e38544e68b3c78a0000000000000000000000000000000000 + // b = 0x10d3b8a2cc1076df4f04ec48a0a4e + let a = 1660249666442741415340463590408472308581124489457651197969883177321645473792f128; + let b = 5460685408006286130247220436011598f128; + + // a / b = 0x37d7bea0c08a6e5c00f664cda8aa1000000.be... + // = 304036863945418882481381401039785785556992.74... + // floor(a / b) = 0x37d7bea0c08a6e5c00f664cda8aa1000000 + // Rounds down to 0x37d7bea0c08a6e5c00f664cda8aa0000000 + // = 304036863945418882481381401039785785556992 + assert_eq!(a.div_euclid(b), 304036863945418882481381401039785785556992f128); + + // ceil(a / b) = 0x37d7bea0c08a6e5c00f664cda8aa1000001 + // Rounds up to 0x37d7bea0c08a6e5c00f664cda8aa2000000 + // = 304036863945418882481381401039785802334208 + assert_eq!((-a).div_euclid(-b), 304036863945418882481381401039785802334208f128); + + // a = 0x36625c03bcfd7d822e780b258e6ec0000000000000000000000000000000000 + // b = 0x166d7391fbd4d59a891d674eb11f3 + let a = 1537417493580712628196032571713889096389680551356671222166298628797042262016f128; + let b = 7278154372064079194208203952558579f128; + + // a / b = 0x26cc5433ff626fe2bbc14362e154affffff.5f... + // = 211237274587334998320332278178386764890111.37... + // floor(a / b) = 0x26cc5433ff626fe2bbc14362e154affffff + // Rounds down to 0x26cc5433ff626fe2bbc14362e154a000000 + // = 211237274587334998320332278178386748112896 + assert_eq!(a.div_euclid(b), 211237274587334998320332278178386748112896f128); + + // ceil(a / b) = 0x26cc5433ff626fe2bbc14362e154b000000 + // Rounds up to 0x26cc5433ff626fe2bbc14362e154c000000 + // = 211237274587334998320332278178386781667328 + assert_eq!((-a).div_euclid(-b), 211237274587334998320332278178386781667328f128); +} + +#[test] +#[cfg(reliable_f128_math)] fn test_recip() { let nan: f128 = f128::NAN; let inf: f128 = f128::INFINITY; diff --git a/library/std/src/f128/u256.rs b/library/std/src/f128/u256.rs new file mode 100644 index 0000000000000..5c812437acbbe --- /dev/null +++ b/library/std/src/f128/u256.rs @@ -0,0 +1,63 @@ +//! Very limited 256-bit arithmetic. + +#[cfg(test)] +mod tests; + +use core::ops::{Sub, SubAssign}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub(crate) struct U256 { + high: u128, + low: u128, +} + +impl U256 { + pub(crate) const ONE: Self = U256 { high: 0, low: 1 }; + + /// `x << shift`. + /// + /// Requires `0 <= shift < 128`. + pub(crate) fn shl_u128(x: u128, shift: u32) -> Self { + Self { low: x << shift, high: if shift != 0 { x >> (128 - shift) } else { 0 } } + } + + /// The number of leading 0 bits. + pub(crate) fn leading_zeros(self) -> u32 { + if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() } + } + + /// Returns (quotient, remainder). + /// + /// Requires `self < (rhs << 128)` + pub(crate) fn div_rem(mut self, rhs: u128) -> (u128, u128) { + let rhs_len = 128 - rhs.leading_zeros(); + let self_len = 256 - self.leading_zeros(); + // self < (rhs << shift_limit) + let shift_limit = (self_len + 1).saturating_sub(rhs_len).min(128); + let mut q = 0; + for shift in (0..shift_limit).rev() { + let rhs_shifted = Self::shl_u128(rhs, shift); + if self >= rhs_shifted { + q |= 1 << shift; + self -= rhs_shifted; + } + } + (q, self.low) + } +} + +impl Sub for U256 { + type Output = Self; + + fn sub(self, rhs: U256) -> Self { + let (low, borrow) = self.low.overflowing_sub(rhs.low); + let (high, _) = self.high.borrowing_sub(rhs.high, borrow); + Self { low, high } + } +} + +impl SubAssign for U256 { + fn sub_assign(&mut self, rhs: U256) { + *self = *self - rhs; + } +} diff --git a/library/std/src/f128/u256/tests.rs b/library/std/src/f128/u256/tests.rs new file mode 100644 index 0000000000000..15ed4981eb9b4 --- /dev/null +++ b/library/std/src/f128/u256/tests.rs @@ -0,0 +1,39 @@ +use crate::f128::u256::U256; + +#[test] +fn test_shl_u128() { + let a = 0x0123456789abcdef0123456789abcdef; + + let b = U256::shl_u128(a, 0); + assert_eq!(b.high, 0); + assert_eq!(b.low, a); + + let b = U256::shl_u128(a, 40); + assert_eq!(b.high, 0x0123456789); + assert_eq!(b.low, 0xabcdef0123456789abcdef0000000000); +} + +#[test] +fn test_leading_zeros() { + assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256); + assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116); + assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252); +} + +#[test] +fn test_sub() { + let a = U256 { high: 7, low: 10 }; + let b = U256 { high: 3, low: 2 }; + let c = U256 { high: 3, low: 15 }; + assert_eq!(a - b, U256 { high: 4, low: 8 }); + assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 }); +} + +#[test] +fn test_div_rem() { + let a = U256 { high: 0, low: 7 }; + let b = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef }; + let c = 0x9876543210abc; + assert_eq!(a.div_rem(c), (0, 7)); + assert_eq!(b.div_rem(c), (0x1207a42fc2c725f9cf302ff31d, 0x7c11e593922a3)); +} diff --git a/library/std/src/f16.rs b/library/std/src/f16.rs index 5b7fcaa28e064..062de8d6ef622 100644 --- a/library/std/src/f16.rs +++ b/library/std/src/f16.rs @@ -4,6 +4,8 @@ //! //! Mathematically significant numbers are provided in the `consts` sub-module. +mod div_euclid; + #[cfg(test)] mod tests; @@ -235,10 +237,14 @@ impl f16 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -264,29 +270,23 @@ impl f16 { #[unstable(feature = "f16", issue = "116909")] #[must_use = "method returns a new number and does not mutate the original value"] pub fn div_euclid(self, rhs: f16) -> f16 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + div_euclid::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f16/div_euclid.rs b/library/std/src/f16/div_euclid.rs new file mode 100644 index 0000000000000..d1872f00f191c --- /dev/null +++ b/library/std/src/f16/div_euclid.rs @@ -0,0 +1,111 @@ +#[cfg(test)] +mod tests; + +/// Software implementation of `f16::div_euclid`. +#[allow(dead_code)] // f16::div_euclid is cfg(not(test)) +pub(crate) fn div_euclid(a: f16, b: f16) -> f16 { + if let Some((a_neg, a_exp, a_m)) = normal_form(a) + && let Some((b_neg, b_exp, b_m)) = normal_form(b) + { + let exp = a_exp - b_exp; + match (a_neg, b_neg) { + (false, false) => div_floor(exp, a_m, b_m), + (true, false) => -div_ceil(exp, a_m, b_m), + (false, true) => -div_floor(exp, a_m, b_m), + (true, true) => div_ceil(exp, a_m, b_m), + } + } else { + // `a` or `b` are +-0.0 or infinity or NaN. + // `a / b` is also +-0.0 or infinity or NaN. + // There is no need to round to an integer. + a / b + } +} + +/// Returns `floor((a << exp) / b)`. +/// +/// Requires `2^10 <= a, b < 2^11`. +fn div_floor(exp: i16, a: u16, b: u16) -> f16 { + if exp < 0 { + 0.0 + } else if exp <= 5 { + // aa < (2^11 << 5) = 2^16 + let aa = a << exp; + // q < 2^16 / 2^10 = 2^6 + let q = (aa / b) as u8; + // We have to use `as` because `From for f16` is not yet implemented. + q as f16 + } else if exp <= 16 { + // aa < (2^11 << 16) = 2^27 + let aa = u32::from(a) << exp; + let bb = u32::from(b); + // q < 2^27 / 2^10 = 2^17 + let q = aa / bb; + q as f16 + } else { + // exp >= 17 + // result >= (2^10 << 17) / 2^11 = 2^16 + // Exponent 16 is too large to represent in f16. + f16::INFINITY + } +} + +/// Returns `ceil((a << exp) / b)`. +/// +/// Requires `2^10 <= a, b < 2^11`. +fn div_ceil(exp: i16, a: u16, b: u16) -> f16 { + if exp < 0 { + 1.0 + } else if exp <= 5 { + // aa < (2^11 << 5) = 2^16 + let aa = a << exp; + // q < 2^16 / 2^10 + 1 = 2^6 + 1 + let q = ((aa - 1) / b) as u8 + 1; + // We have to use `as` because `From for f16` is not yet implemented. + q as f16 + } else if exp <= 16 { + // aa <= ((2^11 - 1) << 16) = 2^27 - 2^16 + let aa = u32::from(a) << exp; + let bb = u32::from(b); + // q <= (2^27 - 2^16) / 2^10 + 1 = 2^17 - 2^6 + 1 + let q = ((aa - 1) / bb) as u16 + 1; + q as f16 + } else { + // exp >= 17 + // result >= (2^10 << 17) / 2^11 = 2^16 + // Exponent 16 is too large to represent in f16. + f16::INFINITY + } +} + +/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`. +/// +/// `x = (-1)^sign * 2^exp * mantissa` +/// +/// `2^10 <= mantissa < 2^11` +fn normal_form(x: f16) -> Option<(bool, i16, u16)> { + let bits = x.to_bits(); + let sign = bits >> 15 != 0; + let biased_exponent = (bits >> 10 & 0x1f) as i16; + let significand = bits & 0x3ff; + match biased_exponent { + 0 if significand == 0 => { + // 0.0 + None + } + 0 => { + // Subnormal number: 2^(-14-10) * significand. + // We want mantissa to have exactly 5 leading zeros. + let shift = significand.leading_zeros() - 5; + Some((sign, -14 - 10 - shift as i16, significand << shift)) + } + 0x1f => { + // Infinity or NaN. + None + } + _ => { + // Normal number: 2^(biased_exponent-15-10) * (2^10 + significand) + Some((sign, biased_exponent - 15 - 10, 1 << 10 | significand)) + } + } +} diff --git a/library/std/src/f16/div_euclid/tests.rs b/library/std/src/f16/div_euclid/tests.rs new file mode 100644 index 0000000000000..e428f16c98d66 --- /dev/null +++ b/library/std/src/f16/div_euclid/tests.rs @@ -0,0 +1,13 @@ +#[cfg(reliable_f16_math)] +#[test] +fn test_normal_form() { + use crate::f16::div_euclid::normal_form; + + assert_eq!(normal_form(-1.5f16), Some((true, -10, 0x600))); + assert_eq!(normal_form(f16::MIN_POSITIVE), Some((false, -24, 0x400))); + assert_eq!(normal_form(f16::MIN_POSITIVE / 2.0), Some((false, -25, 0x400))); + assert_eq!(normal_form(f16::MAX), Some((false, 5, 0x7ff))); + assert_eq!(normal_form(0.0), None); + assert_eq!(normal_form(f16::INFINITY), None); + assert_eq!(normal_form(f16::NAN), None); +} diff --git a/library/std/src/f16/tests.rs b/library/std/src/f16/tests.rs index 684ee3f3855b8..4fc5f5d3dec09 100644 --- a/library/std/src/f16/tests.rs +++ b/library/std/src/f16/tests.rs @@ -442,6 +442,67 @@ fn test_mul_add() { assert_eq!((-3.2f16).mul_add(2.4, neg_inf), neg_inf); } +#[test] +#[cfg(reliable_f16_math)] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f16::NAN; + let inf = f16::INFINITY; + let largest_subnormal = f16::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f16.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f16.div_euclid(0.0), inf); + assert_eq!((-5.0f16).div_euclid(0.0), -inf); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f16.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f16.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f16).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f16.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f16).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f16.div_euclid(10.0), 2.0); + assert_eq!((-20.0f16).div_euclid(10.0), -2.0); + assert_eq!(20.0f16.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f16).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f16.div_euclid(10.0), 2.0); + assert_eq!((-23.0f16).div_euclid(10.0), -3.0); + assert_eq!(23.0f16.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f16).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Large results, exactly divisible. + assert_eq!(1e4f16.div_euclid(2.0), 0.5e4); + assert_eq!((-1e4f16).div_euclid(2.0), -0.5e4); + assert_eq!(1e4f16.div_euclid(-2.0), -0.5e4); + assert_eq!((-1e4f16).div_euclid(-2.0), 0.5e4); + + // Large results, not divisible. + assert_eq!(1003.0f16.div_euclid(10.0), 100.0); + assert_eq!((-1003.0f16).div_euclid(10.0), -101.0); + assert_eq!(1003.0f16.div_euclid(-10.0), -100.0); + assert_eq!((-1003.0f16).div_euclid(-10.0), 101.0); + + // Infinite results. + assert_eq!(1000f16.div_euclid(0.001f16), inf); + assert_eq!((-1000f16).div_euclid(0.001f16), -inf); + assert_eq!(1000f16.div_euclid(-0.001f16), -inf); + assert_eq!((-1000f16).div_euclid(-0.001f16), inf); +} + #[test] #[cfg(reliable_f16_math)] fn test_recip() { diff --git a/library/std/src/f32.rs b/library/std/src/f32.rs index 7cb285bbff5f7..516f6ef0fac3b 100644 --- a/library/std/src/f32.rs +++ b/library/std/src/f32.rs @@ -12,6 +12,8 @@ #![stable(feature = "rust1", since = "1.0.0")] #![allow(missing_docs)] +mod div_euclid; + #[cfg(test)] mod tests; @@ -219,10 +221,14 @@ impl f32 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -244,29 +250,23 @@ impl f32 { #[inline] #[stable(feature = "euclidean_division", since = "1.38.0")] pub fn div_euclid(self, rhs: f32) -> f32 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + div_euclid::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f32/div_euclid.rs b/library/std/src/f32/div_euclid.rs new file mode 100644 index 0000000000000..6040b82d26cb9 --- /dev/null +++ b/library/std/src/f32/div_euclid.rs @@ -0,0 +1,204 @@ +#[cfg(test)] +mod tests; + +/// Software implementation of `f32::div_euclid`. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: f32, b: f32) -> f32 { + if let Some((a_neg, a_exp, a_m)) = normal_form(a) + && let Some((b_neg, b_exp, b_m)) = normal_form(b) + { + let exp = a_exp - b_exp; + match (a_neg, b_neg) { + (false, false) => div_floor(exp, a_m, b_m), + (true, false) => -div_ceil(exp, a_m, b_m), + (false, true) => -div_floor(exp, a_m, b_m), + (true, true) => div_ceil(exp, a_m, b_m), + } + } else { + // `a` or `b` are +-0.0 or infinity or NaN. + // `a / b` is also +-0.0 or infinity or NaN. + // There is no need to round to an integer. + a / b + } +} + +/// Returns `floor((a << exp) / b)`. +/// +/// Requires `2^23 <= a, b < 2^24` +fn div_floor(exp: i32, a: u32, b: u32) -> f32 { + if exp < 0 { + 0.0 + } else if exp <= 8 { + // aa < (2^24 << 8) = 2^32 + let aa = a << exp; + // q < 2^32 / 2^23 = 2^9 + let q = (aa / b) as u16; + q.into() + } else if exp <= 31 { + // aa < (2^24 << 31) = 2^55 + let aa = u64::from(a) << exp; + let bb = u64::from(b); + // q < 2^55 / 2^23 = 2^32 + let q = (aa / bb) as u32; + q as f32 + } else { + // aa >= (2^23 << 31) = 2^54 + // aa < (2^24 << 31) = 2^55 + let aa = u64::from(a) << 31; + let bb = u64::from(b); + // e > 0 + // The result is floor((aa << e) / b). + let e = (exp - 31) as u32; + + // aa = q * b + r + // q >= 2^54 / 2^24 = 2^30 + // q < 2^55 / 2^23 = 2^32 + let q = (aa / bb) as u32; + // 0 <= r < b + let r = (aa % bb) as u32; + + // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b) + // 0 <= (r << e) / b < 2^e + // + // There are two cases: + // 1. floor((r << e) / b) = 0 + // 2. 0 < floor((r << e) / b) < 2^e + // + // In case 1: + // The result is q << e. + // + // In case 2: + // The result is (q << e) + non-zero low e bits. + // This rounds the same way as (q | 1) << e because rounding beyond + // the 25 most significant bits of q depends only on whether the low-order + // bits are non-zero. + // + // Case 1 happens when: + // (r << e) / b < 1 + // (r << e) <= b - 1 + // r <= ((b - 1) >> e) + let case_1_bound = if e < 32 { (b - 1) >> e } else { 0 }; + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + q | 1 + }; + q_adj as f32 * pow2(e) + } +} + +/// Returns ceil((a << exp) / b). +/// +/// Requires `2^23 <= a, b < 2^24` +fn div_ceil(exp: i32, a: u32, b: u32) -> f32 { + if exp < 0 { + 1.0 + } else if exp <= 8 { + // aa < (2^24 << 8) = 2^32 + let aa = a << exp; + // q < 2^32 / 2^23 + 1 = 2^9 + 1 + let q = ((aa - 1) / b) as u16 + 1; + q.into() + } else if exp <= 31 { + // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31 + let aa = u64::from(a) << exp; + let bb = u64::from(b); + // q <= (2^55 - 2^31) / 2^23 + 1 = 2^32 - 2^8 + 1 + let q = ((aa - 1) / bb) as u32 + 1; + q as f32 + } else { + // aa >= (2^23 << 31) = 2^54 + // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31 + let aa = u64::from(a) << 31; + let bb = u64::from(b); + // e > 0 + // The result is ceil((aa << e) / b). + let e = (exp - 31) as u32; + + // aa = q * b + r + // q >= 2^54 / 2^24 = 2^30 + // q <= (2^55 - 2^31) / 2^23 = 2^32 - 2^8 + let q = (aa / bb) as u32; + // 0 <= r < b + let r = (aa % bb) as u32; + + // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b) + // 0 <= (r << e) / b < 2^e + // + // There are three cases: + // 1. ceil((r << e) / b) = 0 + // 2. 0 < ceil((r << e) / b) < 2^e + // 3. ceil((r << e) / b) = 2^e + // + // In case 1: + // The result is q << e. + // + // In case 2: + // The result is (q << e) + non-zero low e bits. + // This rounds the same way as (q | 1) << e because rounding beyond + // the 25 most significant bits of q depends only on whether the low-order + // bits are non-zero. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / b > (1 << e) - 1 + // (r << e) > (b << e) - b + // ((b - r) << e) <= b - 1 + // b - r <= (b - 1) >> e + // r >= b - ((b - 1) >> e) + let case_3_bound = b - if e < 32 { (b - 1) >> e } else { 0 }; + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + q | 1 + } else { + // Case 3. + q + 1 + }; + q_adj as f32 * pow2(e) + } +} + +/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`. +/// +/// `x = (-1)^sign * 2^exp * mantissa` +/// +/// `2^23 <= mantissa < 2^24` +fn normal_form(x: f32) -> Option<(bool, i32, u32)> { + let bits = x.to_bits(); + let sign = bits >> 31 != 0; + let biased_exponent = (bits >> 23 & 0xff) as i32; + let significand = bits & 0x7fffff; + match biased_exponent { + 0 if significand == 0 => { + // 0.0 + None + } + 0 => { + // Subnormal number: 2^(-126-23) * significand. + // We want mantissa to have exactly 8 leading zeros. + let shift = significand.leading_zeros() - 8; + Some((sign, -126 - 23 - shift as i32, significand << shift)) + } + 0xff => { + // Infinity or NaN. + None + } + _ => { + // Normal number: 2^(biased_exponent-127-23) * (2^23 + significand) + Some((sign, biased_exponent - 127 - 23, 1 << 23 | significand)) + } + } +} + +/// Returns `2^exp`. +fn pow2(exp: u32) -> f32 { + if exp <= 127 { f32::from_bits((exp + 127) << 23) } else { f32::INFINITY } +} diff --git a/library/std/src/f32/div_euclid/tests.rs b/library/std/src/f32/div_euclid/tests.rs new file mode 100644 index 0000000000000..e1c139f7e16ed --- /dev/null +++ b/library/std/src/f32/div_euclid/tests.rs @@ -0,0 +1,21 @@ +#[test] +fn test_normal_form() { + use crate::f32::div_euclid::normal_form; + + assert_eq!(normal_form(-1.5f32), Some((true, -23, 0xc00000))); + assert_eq!(normal_form(f32::MIN_POSITIVE), Some((false, -149, 0x800000))); + assert_eq!(normal_form(f32::MIN_POSITIVE / 2.0), Some((false, -150, 0x800000))); + assert_eq!(normal_form(f32::MAX), Some((false, 104, 0xffffff))); + assert_eq!(normal_form(0.0), None); + assert_eq!(normal_form(f32::INFINITY), None); + assert_eq!(normal_form(f32::NAN), None); +} + +#[test] +fn test_pow2() { + use crate::f32::div_euclid::pow2; + + assert_eq!(pow2(0), 1.0f32); + assert_eq!(pow2(4), 16.0f32); + assert_eq!(pow2(128), f32::INFINITY); +} diff --git a/library/std/src/f32/tests.rs b/library/std/src/f32/tests.rs index 99cfcfb231dad..895719432000e 100644 --- a/library/std/src/f32/tests.rs +++ b/library/std/src/f32/tests.rs @@ -423,6 +423,111 @@ fn test_mul_add() { assert_eq!((-3.2f32).mul_add(2.4, neg_inf), neg_inf); } +#[test] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f32::NAN; + let inf = f32::INFINITY; + let largest_subnormal = f32::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f32.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f32.div_euclid(0.0), inf); + assert_eq!((-5.0f32).div_euclid(0.0), -inf); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f32.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f32.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f32).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f32.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f32).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f32.div_euclid(10.0), 2.0); + assert_eq!((-20.0f32).div_euclid(10.0), -2.0); + assert_eq!(20.0f32.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f32).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f32.div_euclid(10.0), 2.0); + assert_eq!((-23.0f32).div_euclid(10.0), -3.0); + assert_eq!(23.0f32.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f32).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Medium results. + assert_eq!(1000000.0f32.div_euclid(10.0), 100000.0); + assert_eq!((-1000000.0f32).div_euclid(10.0), -100000.0); + assert_eq!(1000000.0f32.div_euclid(-10.0), -100000.0); + assert_eq!((-1000000.0f32).div_euclid(-10.0), 100000.0); + + assert_eq!(1000003.0f32.div_euclid(10.0), 100000.0); + assert_eq!((-1000003.0f32).div_euclid(10.0), -100001.0); + assert_eq!(1000003.0f32.div_euclid(-10.0), -100000.0); + assert_eq!((-1000003.0f32).div_euclid(-10.0), 100001.0); + + // Infinite results. + assert_eq!(1e30f32.div_euclid(1e-30), inf); + assert_eq!((-1e30f32).div_euclid(1e-30), -inf); + assert_eq!(1e30f32.div_euclid(-1e-30), -inf); + assert_eq!((-1e30f32).div_euclid(-1e-30), inf); + + // Large results, exactly divisible. + assert_eq!(1e30f32.div_euclid(2.0), 0.5e30); + assert_eq!((-1e30f32).div_euclid(2.0), -0.5e30); + assert_eq!(1e30f32.div_euclid(-2.0), -0.5e30); + assert_eq!((-1e30f32).div_euclid(-2.0), 0.5e30); + + // Large results, not divisible. + let a = 3377699720527872f32; // 3 * 2^50 + let b = 7f32; + // a / b = 0x1b6db6db6db6d.b6... = 482528531503981.71... + // floor(a / b) = 0x1b6db6db6db6d + // ceil(a / b) = 0x1b6db6db6db6e + // Both round to 0x1b6db6e000000 = 482528536297472 + assert_eq!(a.div_euclid(b), 482528536297472f32); + assert_eq!((-a).div_euclid(-b), 482528536297472f32); + + // Large results, test precise rounding edge cases. + + // a = 0x7dccd9000000000 = 566553671500824576 + // b = 0xed5c1c = 5555612 + let a = 566553671500824576f32; + let b = 15555612f32; + + // a / b = 0x87adf0800.c9... = 36421175296.78... + // floor(a / b) = 0x87adf0800 + // Rounds down to 0x87adf0000 = 36421173248 + assert_eq!(a.div_euclid(b), 36421173248f32); + + // ceil(a / b) = 0x87adf0801 + // Rounds up to 0x87adf1000 = 36421177344 + assert_eq!((-a).div_euclid(-b), 36421177344f32); + + // a = 0x7a83d5800000000 = 551758402519302144 + // b = 0x868b0f = 8817423 + let a = 551758402519302144f32; + let b = 8817423f32; + + // a / b = 0xe91d0d7ff.03... = 62575925247.01... + // floor(a / b) = 0xe91d0d7ff + // Rounds down to 0xe91d0d000 = 62575923200 + assert_eq!(a.div_euclid(b), 62575923200f32); + + // ceil(a / b) = 0xe91d0d800 + // Rounds up to 0xe91d0e000 = 62575927296 + assert_eq!((-a).div_euclid(-b), 62575927296f32); +} + #[test] fn test_recip() { let nan: f32 = f32::NAN; diff --git a/library/std/src/f64.rs b/library/std/src/f64.rs index 47163c272de32..fb17857ec6d94 100644 --- a/library/std/src/f64.rs +++ b/library/std/src/f64.rs @@ -12,6 +12,8 @@ #![stable(feature = "rust1", since = "1.0.0")] #![allow(missing_docs)] +mod div_euclid; + #[cfg(test)] mod tests; @@ -219,10 +221,14 @@ impl f64 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -244,29 +250,23 @@ impl f64 { #[inline] #[stable(feature = "euclidean_division", since = "1.38.0")] pub fn div_euclid(self, rhs: f64) -> f64 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + div_euclid::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f64/div_euclid.rs b/library/std/src/f64/div_euclid.rs new file mode 100644 index 0000000000000..f4346dd43afff --- /dev/null +++ b/library/std/src/f64/div_euclid.rs @@ -0,0 +1,204 @@ +#[cfg(test)] +mod tests; + +/// Software implementation of `f64::div_euclid`. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: f64, b: f64) -> f64 { + if let Some((a_neg, a_exp, a_m)) = normal_form(a) + && let Some((b_neg, b_exp, b_m)) = normal_form(b) + { + let exp = a_exp - b_exp; + match (a_neg, b_neg) { + (false, false) => div_floor(exp, a_m, b_m), + (true, false) => -div_ceil(exp, a_m, b_m), + (false, true) => -div_floor(exp, a_m, b_m), + (true, true) => div_ceil(exp, a_m, b_m), + } + } else { + // `a` or `b` are +-0.0 or infinity or NaN. + // `a / b` is also +-0.0 or infinity or NaN. + // There is no need to round to an integer. + a / b + } +} + +/// Returns `floor((a << exp) / b)`. +/// +/// Requires `2^52 <= a, b < 2^53`. +fn div_floor(exp: i32, a: u64, b: u64) -> f64 { + if exp < 0 { + 0.0 + } else if exp <= 11 { + // aa < (2^53 << 11) = 2^64 + let aa = a << exp; + // q < 2^64 / 2^52 = 2^12 + let q = (aa / b) as u32; + q.into() + } else if exp <= 63 { + // aa < (2^53 << 63) = 2^116 + let aa = u128::from(a) << exp; + let bb = u128::from(b); + // q < 2^116 / 2^52 = 2^64 + let q = (aa / bb) as u64; + q as f64 + } else { + // aa >= (2^52 << 63) = 2^115 + // aa < (2^53 << 63) = 2^116 + let aa = u128::from(a) << 63; + let bb = u128::from(b); + // e > 0 + // The result is floor((aa << e) / b). + let e = (exp - 63) as u32; + + // aa = q * b + r + // q >= 2^115 / 2^53 = 2^62 + // q < 2^116 / 2^52 = 2^64 + let q = (aa / bb) as u64; + // 0 <= r < b + let r = (aa % bb) as u64; + + // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b) + // 0 <= (r << e) / b < 2^e + // + // There are two cases: + // 1. floor((r << e) / b) = 0 + // 2. 0 < floor((r << e) / b) < 2^e + // + // In case 1: + // The result is q << e. + // + // In case 2: + // The result is (q << e) + non-zero low e bits. + // This rounds the same way as (q | 1) << e because rounding beyond + // the 25 most significant bits of q depends only on whether the low-order + // bits are non-zero. + // + // Case 1 happens when: + // (r << e) / b < 1 + // (r << e) <= b - 1 + // r <= ((b - 1) >> e) + let case_1_bound = if e < 64 { (b - 1) >> e } else { 0 }; + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + q | 1 + }; + q_adj as f64 * pow2(e) + } +} + +/// Returns `ceil((a << exp) / b)`. +/// +/// Requires `2^52 <= a, b < 2^53`. +fn div_ceil(exp: i32, a: u64, b: u64) -> f64 { + if exp < 0 { + 1.0 + } else if exp <= 11 { + // aa < (2^53 << 11) = 2^64 + let aa = a << exp; + // q < 2^64 / 2^52 + 1 = 2^12 + 1 + let q = ((aa - 1) / b) as u32 + 1; + q.into() + } else if exp <= 63 { + // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63 + let aa = u128::from(a) << exp; + let bb = u128::from(b); + // q <= (2^116 - 2^63) / 2^52 + 1 = 2^64 - 2^11 + 1 + let q = ((aa - 1) / bb) as u64 + 1; + q as f64 + } else { + // aa >= (2^52 << 63) = 2^115 + // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63 + let aa = u128::from(a) << 63; + let bb = u128::from(b); + // e > 0 + // The result is ceil((aa << e) / b). + let e = (exp - 63) as u32; + + // aa = q * b + r + // q >= 2^115 / 2^53 = 2^62 + // q <= (2^116 - 2^63) / 2^52 = 2^64 - 2^11 + let q = (aa / bb) as u64; + // 0 <= r < b + let r = (aa % bb) as u64; + + // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b) + // 0 <= (r << e) / b < 2^e + // + // There are three cases: + // 1. ceil((r << e) / b) = 0 + // 2. 0 < ceil((r << e) / b) < 2^e + // 3. ceil((r << e) / b) = 2^e + // + // In case 1: + // The result is q << e. + // + // In case 2: + // The result is (q << e) + non-zero low e bits. + // This rounds the same way as (q | 1) << e because rounding beyond + // the 54 most significant bits of q depends only on whether the low-order + // bits are non-zero. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / b > (1 << e) - 1 + // (r << e) > (b << e) - b + // ((b - r) << e) <= b - 1 + // b - r <= (b - 1) >> e + // r >= b - ((b - 1) >> e) + let case_3_bound = b - if e < 64 { (b - 1) >> e } else { 0 }; + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + q | 1 + } else { + // Case 3. + q + 1 + }; + q_adj as f64 * pow2(e) + } +} + +/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`. +/// +/// `x = (-1)^sign * 2^exp * mantissa` +/// +/// `2^52 <= mantissa < 2^53` +fn normal_form(x: f64) -> Option<(bool, i32, u64)> { + let bits = x.to_bits(); + let sign = bits >> 63 != 0; + let biased_exponent = (bits >> 52 & 0x7ff) as i32; + let significand = bits & ((1 << 52) - 1); + match biased_exponent { + 0 if significand == 0 => { + // 0.0 + None + } + 0 => { + // Subnormal number: 2^(-1022-52) * significand. + // We want mantissa to have exactly 11 leading zeros. + let shift = significand.leading_zeros() - 11; + Some((sign, -1022 - 52 - shift as i32, significand << shift)) + } + 0x7ff => { + // Infinity or NaN. + None + } + _ => { + // Normal number: 2^(biased_exponent-1023-52) * (2^52 + significand) + Some((sign, biased_exponent - 1023 - 52, 1 << 52 | significand)) + } + } +} + +/// Returns `2^exp`. +fn pow2(exp: u32) -> f64 { + if exp <= 1023 { f64::from_bits(u64::from(exp + 1023) << 52) } else { f64::INFINITY } +} diff --git a/library/std/src/f64/div_euclid/tests.rs b/library/std/src/f64/div_euclid/tests.rs new file mode 100644 index 0000000000000..eeb43a6b7d868 --- /dev/null +++ b/library/std/src/f64/div_euclid/tests.rs @@ -0,0 +1,21 @@ +#[test] +fn test_normal_form() { + use crate::f64::div_euclid::normal_form; + + assert_eq!(normal_form(-1.5f64), Some((true, -52, 3 << 51))); + assert_eq!(normal_form(f64::MIN_POSITIVE), Some((false, -1074, 1 << 52))); + assert_eq!(normal_form(f64::MIN_POSITIVE / 2.0), Some((false, -1075, 1 << 52))); + assert_eq!(normal_form(f64::MAX), Some((false, 971, (1 << 53) - 1))); + assert_eq!(normal_form(0.0), None); + assert_eq!(normal_form(f64::INFINITY), None); + assert_eq!(normal_form(f64::NAN), None); +} + +#[test] +fn test_pow2() { + use crate::f64::div_euclid::pow2; + + assert_eq!(pow2(0), 1.0f64); + assert_eq!(pow2(4), 16.0f64); + assert_eq!(pow2(1024), f64::INFINITY); +} diff --git a/library/std/src/f64/tests.rs b/library/std/src/f64/tests.rs index 3fac2efe0d76c..0499cc4516b4e 100644 --- a/library/std/src/f64/tests.rs +++ b/library/std/src/f64/tests.rs @@ -411,6 +411,122 @@ fn test_mul_add() { assert_eq!((-3.2f64).mul_add(2.4, neg_inf), neg_inf); } +#[test] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f64::NAN; + let inf = f64::INFINITY; + let largest_subnormal = f64::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f64.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f64.div_euclid(0.0), inf); + assert_eq!((-5.0f64).div_euclid(0.0), -inf); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f64.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f64.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f64).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f64.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f64).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f64.div_euclid(10.0), 2.0); + assert_eq!((-20.0f64).div_euclid(10.0), -2.0); + assert_eq!(20.0f64.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f64).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f64.div_euclid(10.0), 2.0); + assert_eq!((-23.0f64).div_euclid(10.0), -3.0); + assert_eq!(23.0f64.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f64).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Issue #107904 + let a = -0.7500000000000006f64; + let b = 0.0833333333333334f64; + assert_eq!(a.div_euclid(b), -9.0); + + let a = 46.11764705882354f64; + let b = 1.6470588235294124f64; + assert_eq!(a.div_euclid(b), 27.0); + + // Medium results. + assert_eq!(1000000.0f64.div_euclid(10.0), 100000.0); + assert_eq!((-1000000.0f64).div_euclid(10.0), -100000.0); + assert_eq!(1000000.0f64.div_euclid(-10.0), -100000.0); + assert_eq!((-1000000.0f64).div_euclid(-10.0), 100000.0); + + assert_eq!(1000003.0f64.div_euclid(10.0), 100000.0); + assert_eq!((-1000003.0f64).div_euclid(10.0), -100001.0); + assert_eq!(1000003.0f64.div_euclid(-10.0), -100000.0); + assert_eq!((-1000003.0f64).div_euclid(-10.0), 100001.0); + + // Large results, exactly divisible. + assert_eq!(1e300f64.div_euclid(2.0), 0.5e300); + assert_eq!((-1e300f64).div_euclid(2.0), -0.5e300); + assert_eq!(1e300f64.div_euclid(-2.0), -0.5e300); + assert_eq!((-1e300f64).div_euclid(-2.0), 0.5e300); + + // Infinite results. + assert_eq!(1e300f64.div_euclid(1e-300), inf); + assert_eq!((-1e300f64).div_euclid(1e-300), -inf); + assert_eq!(1e300f64.div_euclid(-1e-300), -inf); + assert_eq!((-1e300f64).div_euclid(-1e-300), inf); + + // Large results, not divisible. + let a = 3802951800684688204490109616128f64; // 3 * 2^100 + let b = 7f64; + // a / b = 0x6db6db6db6db6db6db6db6db6.db... + // = 543278828669241172070015659446.85... + // floor(a / b) = 0x6db6db6db6db6db6db6db6db6 + // ceil(a / b) = 0x6db6db6db6db6db6db6db6db7 + // Both round to 0x6db6db6db6db6c00000000000 + // = 543278828669241141911982440448 + assert_eq!(a.div_euclid(b), 543278828669241141911982440448f64); + assert_eq!((-a).div_euclid(-b), 543278828669241141911982440448f64); + + // Large results, test precise rounding edge cases. + + // a = 0xc3b75b9c77bfa80000000000000000 = 1016216826558977217020954221979107328 + // b = 0x130594ed7aaacb = 5354161755040459 + let a = 1016216826558977217020954221979107328f64; + let b = 5354161755040459f64; + + // a / b = 0xa49ff0421f8f94000.0a... = 189799425764882989056.03... + // floor(a / b) = 0xa49ff0421f8f94000 + // Rounds down to 0xa49ff0421f8f90000 = 189799425764882972672 + assert_eq!(a.div_euclid(b), 189799425764882972672f64); + + // ceil(a / b) = 0xa49ff0421f8f94001 + // Rounds up to 0xa49ff0421f8f98000 = 189799425764883005440 + assert_eq!((-a).div_euclid(-b), 189799425764883005440f64); + + // a = 0xee67640bd3b6880000000000000000 = 1237863666996996959508242859395383296 + // b = 0x15409d10e91300 = 5982017848677120 + // a / b = 0xb37bdd747bac8bfff.5b... = 206930787956565786623.35... + let a = 1237863666996996959508242859395383296f64; + let b = 5982017848677120f64; + + // floor(a / b) = 0xb37bdd747bac8bfff + // Rounds down to 0xb37bdd747bac88000 = 206930787956565770240 + assert_eq!(a.div_euclid(b), 206930787956565770240f64); + + // ceil(a / b) = 0xb37bdd747bac8c000 + // Rounds up to 0xb37bdd747bac90000 = + assert_eq!((-a).div_euclid(-b), 206930787956565803008f64); +} + #[test] fn test_recip() { let nan: f64 = f64::NAN; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 49a0322003905..dbbfed619559d 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -321,6 +321,7 @@ // Library features (core): // tidy-alphabetical-start #![feature(array_chunks)] +#![feature(bigint_helper_methods)] #![feature(c_str_module)] #![feature(char_internals)] #![feature(clone_to_uninit)] From 4534005bc0bf868371c86fcdfb3b53f2549101f4 Mon Sep 17 00:00:00 2001 From: Tomek Czajka Date: Wed, 11 Dec 2024 09:43:08 +0100 Subject: [PATCH 2/5] Fix overflow bug in f16::div_ceil --- library/std/src/f16/div_euclid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/f16/div_euclid.rs b/library/std/src/f16/div_euclid.rs index d1872f00f191c..df770c3af1c25 100644 --- a/library/std/src/f16/div_euclid.rs +++ b/library/std/src/f16/div_euclid.rs @@ -68,7 +68,7 @@ fn div_ceil(exp: i16, a: u16, b: u16) -> f16 { let aa = u32::from(a) << exp; let bb = u32::from(b); // q <= (2^27 - 2^16) / 2^10 + 1 = 2^17 - 2^6 + 1 - let q = ((aa - 1) / bb) as u16 + 1; + let q = (aa - 1) / bb + 1; q as f16 } else { // exp >= 17 From cedc71861d2259aa62e46d9135e6b2dc7fd10fb2 Mon Sep 17 00:00:00 2001 From: Tomek Czajka Date: Wed, 11 Dec 2024 18:44:16 +0100 Subject: [PATCH 3/5] Software `div_euclid` refactoring. `f{16,32,64,128}/soft.rs` are now very similar. f32 and f64 almost identical. `U256` helper moved to `crate::u256`. `(-5f32).div_euclid(f32::INFINITY)` now returns -1 not 0. --- library/std/src/f128.rs | 5 +- library/std/src/f128/div_euclid.rs | 204 ------------- library/std/src/f128/div_euclid/tests.rs | 23 -- library/std/src/f128/soft.rs | 350 +++++++++++++++++++++++ library/std/src/f128/soft/tests.rs | 49 ++++ library/std/src/f128/tests.rs | 12 +- library/std/src/f128/u256.rs | 63 ---- library/std/src/f128/u256/tests.rs | 39 --- library/std/src/f16.rs | 4 +- library/std/src/f16/div_euclid.rs | 111 ------- library/std/src/f16/div_euclid/tests.rs | 13 - library/std/src/f16/soft.rs | 213 ++++++++++++++ library/std/src/f16/soft/tests.rs | 39 +++ library/std/src/f16/tests.rs | 11 +- library/std/src/f32.rs | 4 +- library/std/src/f32/div_euclid.rs | 204 ------------- library/std/src/f32/div_euclid/tests.rs | 21 -- library/std/src/f32/soft.rs | 344 ++++++++++++++++++++++ library/std/src/f32/soft/tests.rs | 49 ++++ library/std/src/f32/tests.rs | 11 + library/std/src/f64.rs | 4 +- library/std/src/f64/div_euclid.rs | 204 ------------- library/std/src/f64/div_euclid/tests.rs | 21 -- library/std/src/f64/soft.rs | 344 ++++++++++++++++++++++ library/std/src/f64/soft/tests.rs | 49 ++++ library/std/src/f64/tests.rs | 10 + library/std/src/lib.rs | 1 + library/std/src/u256.rs | 117 ++++++++ library/std/src/u256/tests.rs | 66 +++++ 29 files changed, 1670 insertions(+), 915 deletions(-) delete mode 100644 library/std/src/f128/div_euclid.rs delete mode 100644 library/std/src/f128/div_euclid/tests.rs create mode 100644 library/std/src/f128/soft.rs create mode 100644 library/std/src/f128/soft/tests.rs delete mode 100644 library/std/src/f128/u256.rs delete mode 100644 library/std/src/f128/u256/tests.rs delete mode 100644 library/std/src/f16/div_euclid.rs delete mode 100644 library/std/src/f16/div_euclid/tests.rs create mode 100644 library/std/src/f16/soft.rs create mode 100644 library/std/src/f16/soft/tests.rs delete mode 100644 library/std/src/f32/div_euclid.rs delete mode 100644 library/std/src/f32/div_euclid/tests.rs create mode 100644 library/std/src/f32/soft.rs create mode 100644 library/std/src/f32/soft/tests.rs delete mode 100644 library/std/src/f64/div_euclid.rs delete mode 100644 library/std/src/f64/div_euclid/tests.rs create mode 100644 library/std/src/f64/soft.rs create mode 100644 library/std/src/f64/soft/tests.rs create mode 100644 library/std/src/u256.rs create mode 100644 library/std/src/u256/tests.rs diff --git a/library/std/src/f128.rs b/library/std/src/f128.rs index 56f3df1b4aa95..a206ef8fa47c9 100644 --- a/library/std/src/f128.rs +++ b/library/std/src/f128.rs @@ -4,8 +4,7 @@ //! //! Mathematically significant numbers are provided in the `consts` sub-module. -mod div_euclid; -mod u256; +mod soft; #[cfg(test)] mod tests; @@ -271,7 +270,7 @@ impl f128 { #[unstable(feature = "f128", issue = "116909")] #[must_use = "method returns a new number and does not mutate the original value"] pub fn div_euclid(self, rhs: f128) -> f128 { - div_euclid::div_euclid(self, rhs) + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. diff --git a/library/std/src/f128/div_euclid.rs b/library/std/src/f128/div_euclid.rs deleted file mode 100644 index 38334ac674aaf..0000000000000 --- a/library/std/src/f128/div_euclid.rs +++ /dev/null @@ -1,204 +0,0 @@ -#[cfg(test)] -mod tests; - -use crate::f128::u256::U256; - -/// Software implementation of `f128::div_euclid`. -#[allow(dead_code)] -pub(crate) fn div_euclid(a: f128, b: f128) -> f128 { - if let Some((a_neg, a_exp, a_m)) = normal_form(a) - && let Some((b_neg, b_exp, b_m)) = normal_form(b) - { - let exp = a_exp - b_exp; - match (a_neg, b_neg) { - (false, false) => div_floor(exp, a_m, b_m), - (true, false) => -div_ceil(exp, a_m, b_m), - (false, true) => -div_floor(exp, a_m, b_m), - (true, true) => div_ceil(exp, a_m, b_m), - } - } else { - // `a` or `b` are +-0.0 or infinity or NaN. - // `a / b` is also +-0.0 or infinity or NaN. - // There is no need to round to an integer. - a / b - } -} - -/// Returns `floor((a << exp) / b)`. -/// -/// Requires `2^112 <= a, b < 2^113`. -fn div_floor(exp: i32, a: u128, b: u128) -> f128 { - if exp < 0 { - 0.0 - } else if exp <= 15 { - // aa < (2^113 << 15) = 2^128 - let aa = a << exp; - // q < 2^128 / 2^112 = 2^16 - let q = (aa / b) as u32; - // We have to use `as` because `From for f128` is not yet implemented. - q as f128 - } else if exp <= 127 { - // aa = a << exp - // aa < (2^113 << 127) = 2^240 - let aa = U256::shl_u128(a, exp as u32); - // q < 2^240 / 2^112 = 2^128 - let (q, _) = aa.div_rem(b); - q as f128 - } else { - // aa >= (2^112 << 127) = 2^239 - // aa < (2^113 << 127) = 2^240 - let aa = U256::shl_u128(a, 127); - // e > 0 - // The result is floor((aa << e) / b). - let e = (exp - 127) as u32; - - // aa = q * b + r - // q >= 2^239 / 2^113 = 2^126 - // q < 2^239 / 2^112 = 2^128 - // 0 <= r < b - let (q, r) = aa.div_rem(b); - - // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b) - // 0 <= (r << e) / b < 2^e - // - // There are two cases: - // 1. floor((r << e) / b) = 0 - // 2. 0 < floor((r << e) / b) < 2^e - // - // In case 1: - // The result is q << e. - // - // In case 2: - // The result is (q << e) + non-zero low e bits. - // This rounds the same way as (q | 1) << e because rounding beyond - // the 25 most significant bits of q depends only on whether the low-order - // bits are non-zero. - // - // Case 1 happens when: - // (r << e) / b < 1 - // (r << e) <= b - 1 - // r <= ((b - 1) >> e) - let case_1_bound = if e < 128 { (b - 1) >> e } else { 0 }; - let q_adj = if r <= case_1_bound { - // Case 1. - q - } else { - // Case 2. - q | 1 - }; - q_adj as f128 * pow2(e) - } -} - -/// Returns `ceil((a << exp) / b)`. -/// -/// Requires `2^112 <= a, b < 2^113`. -fn div_ceil(exp: i32, a: u128, b: u128) -> f128 { - if exp < 0 { - 1.0 - } else if exp <= 15 { - // aa < (2^113 << 15) = 2^128 - let aa = a << exp; - // q < 2^128 / 2^112 + 1 = 2^16 + 1 - let q = ((aa - 1) / b) as u32 + 1; - // We have to use `as` because `From for f128` is not yet implemented. - q as f128 - } else if exp <= 127 { - // aa = a << exp - // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127 - let aa = U256::shl_u128(a, exp as u32); - // q <= (2^240 - 2^127) / 2^112 + 1 = 2^128 - 2^15 + 1 - let (q, _) = (aa - U256::ONE).div_rem(b); - (q + 1) as f128 - } else { - // aa >= (2^112 << 127) = 2^239 - // aa <= ((2^113 - 1) << 127) = 2^240 - 2^127 - let aa = U256::shl_u128(a, 127); - // e > 0 - // The result is ceil((aa << e) / b). - let e = (exp - 127) as u32; - - // aa = q * b + r - // q >= 2^239 / 2^112 = 2^126 - // q <= (2^240 - 2^127) / 2^112 = 2^128 - 2^15 - // 0 <= r < b - let (q, r) = aa.div_rem(b); - - // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b) - // 0 <= (r << e) / b < 2^e - // - // There are three cases: - // 1. ceil((r << e) / b) = 0 - // 2. 0 < ceil((r << e) / b) < 2^e - // 3. ceil((r << e) / b) = 2^e - // - // In case 1: - // The result is q << e. - // - // In case 2: - // The result is (q << e) + non-zero low e bits. - // This rounds the same way as (q | 1) << e because rounding beyond - // the 54 most significant bits of q depends only on whether the low-order - // bits are non-zero. - // - // In case 3: - // The result is (q + 1) << e. - // - // Case 1 happens when r = 0. - // Case 3 happens when: - // (r << e) / b > (1 << e) - 1 - // (r << e) > (b << e) - b - // ((b - r) << e) <= b - 1 - // b - r <= (b - 1) >> e - // r >= b - ((b - 1) >> e) - let case_3_bound = b - if e < 128 { (b - 1) >> e } else { 0 }; - let q_adj = if r == 0 { - // Case 1. - q - } else if r < case_3_bound { - // Case 2. - q | 1 - } else { - // Case 3. - q + 1 - }; - q_adj as f128 * pow2(e) - } -} - -/// For finite, non-zero numbers returns (sign, exponent, mantissa). -/// -/// `x = (-1)^sign * 2^exp * mantissa` -/// -/// `2^112 <= mantissa < 2^113` -fn normal_form(x: f128) -> Option<(bool, i32, u128)> { - let bits = x.to_bits(); - let sign = bits >> 127 != 0; - let biased_exponent = (bits >> 112 & 0x7fff) as i32; - let significand = bits & ((1 << 112) - 1); - match biased_exponent { - 0 if significand == 0 => { - // 0.0 - None - } - 0 => { - // Subnormal number: 2^(-16382-112) * significand. - // We want mantissa to have exactly 15 leading zeros. - let shift = significand.leading_zeros() - 15; - Some((sign, -16382 - 112 - shift as i32, significand << shift)) - } - 0x7fff => { - // Infinity or NaN. - None - } - _ => { - // Normal number: 2^(biased_exponent-16383-112) * (2^112 + significand) - Some((sign, biased_exponent - 16383 - 112, 1 << 112 | significand)) - } - } -} - -/// Returns `2^exp`. -fn pow2(exp: u32) -> f128 { - if exp <= 16383 { f128::from_bits(u128::from(exp + 16383) << 112) } else { f128::INFINITY } -} diff --git a/library/std/src/f128/div_euclid/tests.rs b/library/std/src/f128/div_euclid/tests.rs deleted file mode 100644 index a89004bf10ea8..0000000000000 --- a/library/std/src/f128/div_euclid/tests.rs +++ /dev/null @@ -1,23 +0,0 @@ -#![cfg(reliable_f128_math)] - -#[test] -fn test_normal_form() { - use crate::f128::div_euclid::normal_form; - - assert_eq!(normal_form(-1.5f128), Some((true, -112, 3 << 111))); - assert_eq!(normal_form(f128::MIN_POSITIVE), Some((false, -16494, 1 << 112))); - assert_eq!(normal_form(f128::MIN_POSITIVE / 2.0), Some((false, -16495, 1 << 112))); - assert_eq!(normal_form(f128::MAX), Some((false, 16271, (1 << 113) - 1))); - assert_eq!(normal_form(0.0), None); - assert_eq!(normal_form(f128::INFINITY), None); - assert_eq!(normal_form(f128::NAN), None); -} - -#[test] -fn test_pow2() { - use crate::f128::div_euclid::pow2; - - assert_eq!(pow2(0), 1.0f128); - assert_eq!(pow2(4), 16.0f128); - assert_eq!(pow2(16384), f128::INFINITY); -} diff --git a/library/std/src/f128/soft.rs b/library/std/src/f128/soft.rs new file mode 100644 index 0000000000000..30b6e82e18086 --- /dev/null +++ b/library/std/src/f128/soft.rs @@ -0,0 +1,350 @@ +//! Software floating point functions for `f128`. + +#[cfg(test)] +mod tests; + +use crate::u256::U256; + +/// The floating point type. +type FP = f128; + +/// Type of the exponent. +type Exponent = i32; + +/// Type of `FP::to_bits`. +type Bits = u128; + +/// Half as many bits as `Bits`. +type HalfBits = u64; + +/// Twice as many bits as `Bits`. +type DoubleBits = U256; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Shift right that works even if `shift >= Bits::BITS`. +/// +/// Requires `shift >= 0`. +fn safe_shr(a: Bits, shift: Exponent) -> Bits { + if shift < Bits::BITS as Exponent { a >> shift } else { 0 } +} + +/// Returns `2^exp`. +fn pow2(exp: Exponent) -> FP { + let biased_exponent = exp + EXPONENT_BIAS; + if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) { + // Round to 0. + 0.0 + } else if biased_exponent <= 0 { + // Subnormal. + FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1)) + } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 { + // Normal. + FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS) + } else { + // Round to infinity. + FP::INFINITY + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + // We have to use `as` because `From for f128` is not yet implemented. + q as f128 + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp as u32; + let bb = DoubleBits::from(b.mantissa); + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + let q: Bits = (aa / bb).wrap_u128(); + q as FP + } else { + // exp >= Bits::BITS + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + + // e > 0 + // The result is `floor((aa << e) / bb)` + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + // 0 <= r < b + let (q, r) = aa.div_rem(bb); + let q: Bits = q.wrap_u128(); + let r: Bits = r.wrap_u128(); + + // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are two cases: + // 1. floor((r << e) / bb) = 0 + // 2. 0 < floor((r << e) / bb) < 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // Case 1 happens when: + // (r << e) / bb < 1 + // (r << e) <= bb - 1 + // r <= ((bb - 1) >> e) + let case_1_bound = safe_shr(b.mantissa - 1, e); + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + }; + q_adj as FP * pow2(e) + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + // We have to use `as` because `From for f128` is not yet implemented. + q as f128 + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp as u32; + let bb = DoubleBits::from(b.mantissa); + // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1 + // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX + let q = ((aa - U256::ONE) / bb).wrap_u128() + 1; + q as FP + } else { + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + // e > 0 + // The result is ceil((aa << e) / b). + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q < Bits::MAX` as in the previous case. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + // 0 <= r < b + let (q, r) = aa.div_rem(bb); + let q: Bits = q.wrap_u128(); + let r: Bits = r.wrap_u128(); + + // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are three cases: + // 1. ceil((r << e) / bb) = 0 + // 2. 0 < ceil((r << e) / bb) < 2^e + // 3. ceil((r << e) / bb) = 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / bb > (1 << e) - 1 + // (r << e) > (bb << e) - b + // ((bb - r) << e) <= bb - 1 + // bb - r <= (bb - 1) >> e + // r >= bb - ((bb - 1) >> e) + let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e); + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + } else { + // Case 3. + // No overflow because `q < Bits::MAX`. + q + 1 + }; + q_adj as FP * pow2(e) + } +} diff --git a/library/std/src/f128/soft/tests.rs b/library/std/src/f128/soft/tests.rs new file mode 100644 index 0000000000000..2156e3fd8448a --- /dev/null +++ b/library/std/src/f128/soft/tests.rs @@ -0,0 +1,49 @@ +use crate::f128::soft::{Positive, PositiveFinite, Representation, Sign, pow2}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f128), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -112, mantissa: 3 << 111 }), + }); + + assert_eq!(Representation::new(f128::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -16494, mantissa: 1 << 112 }), + }); + + assert_eq!(Representation::new(f128::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -16495, mantissa: 1 << 112 }), + }); + + assert_eq!(Representation::new(f128::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 16271, mantissa: (1 << 113) - 1 }), + }); + + assert_eq!(Representation::new(-0.0f128), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f128::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f128::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} + +#[test] +fn test_pow2() { + assert_eq!(pow2(2), 4.0); + assert_eq!(pow2(-1), 0.5); + assert_eq!(pow2(-16495), 0.0); + assert_eq!(pow2(-16382), f128::MIN_POSITIVE); + assert_eq!(pow2(-16383), f128::MIN_POSITIVE / 2.0); + assert_eq!(pow2(16384), f128::INFINITY); +} diff --git a/library/std/src/f128/tests.rs b/library/std/src/f128/tests.rs index 299d74d19b005..667e4dc68bc30 100644 --- a/library/std/src/f128/tests.rs +++ b/library/std/src/f128/tests.rs @@ -460,7 +460,6 @@ fn test_mul_add() { } #[test] -#[cfg(reliable_f128_math)] fn test_div_euclid() { use core::cmp::Ordering; @@ -469,7 +468,6 @@ fn test_div_euclid() { let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS); // Infinity and NaN. - assert!(nan.div_euclid(5.0).is_nan()); assert!(inf.div_euclid(inf).is_nan()); assert!(0.0f128.div_euclid(0.0).is_nan()); @@ -479,6 +477,16 @@ fn test_div_euclid() { assert_eq!(5.0f128.div_euclid(0.0), inf); assert_eq!((-5.0f128).div_euclid(0.0), -inf); + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f128.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f128.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f128).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f128.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f128.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f128).div_euclid(-inf), 1.0); + // 0 / x assert_eq!(Ordering::Equal, 0.0f128.div_euclid(10.0).total_cmp(&0.0)); assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(10.0).total_cmp(&-0.0)); diff --git a/library/std/src/f128/u256.rs b/library/std/src/f128/u256.rs deleted file mode 100644 index 5c812437acbbe..0000000000000 --- a/library/std/src/f128/u256.rs +++ /dev/null @@ -1,63 +0,0 @@ -//! Very limited 256-bit arithmetic. - -#[cfg(test)] -mod tests; - -use core::ops::{Sub, SubAssign}; - -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] -pub(crate) struct U256 { - high: u128, - low: u128, -} - -impl U256 { - pub(crate) const ONE: Self = U256 { high: 0, low: 1 }; - - /// `x << shift`. - /// - /// Requires `0 <= shift < 128`. - pub(crate) fn shl_u128(x: u128, shift: u32) -> Self { - Self { low: x << shift, high: if shift != 0 { x >> (128 - shift) } else { 0 } } - } - - /// The number of leading 0 bits. - pub(crate) fn leading_zeros(self) -> u32 { - if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() } - } - - /// Returns (quotient, remainder). - /// - /// Requires `self < (rhs << 128)` - pub(crate) fn div_rem(mut self, rhs: u128) -> (u128, u128) { - let rhs_len = 128 - rhs.leading_zeros(); - let self_len = 256 - self.leading_zeros(); - // self < (rhs << shift_limit) - let shift_limit = (self_len + 1).saturating_sub(rhs_len).min(128); - let mut q = 0; - for shift in (0..shift_limit).rev() { - let rhs_shifted = Self::shl_u128(rhs, shift); - if self >= rhs_shifted { - q |= 1 << shift; - self -= rhs_shifted; - } - } - (q, self.low) - } -} - -impl Sub for U256 { - type Output = Self; - - fn sub(self, rhs: U256) -> Self { - let (low, borrow) = self.low.overflowing_sub(rhs.low); - let (high, _) = self.high.borrowing_sub(rhs.high, borrow); - Self { low, high } - } -} - -impl SubAssign for U256 { - fn sub_assign(&mut self, rhs: U256) { - *self = *self - rhs; - } -} diff --git a/library/std/src/f128/u256/tests.rs b/library/std/src/f128/u256/tests.rs deleted file mode 100644 index 15ed4981eb9b4..0000000000000 --- a/library/std/src/f128/u256/tests.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::f128::u256::U256; - -#[test] -fn test_shl_u128() { - let a = 0x0123456789abcdef0123456789abcdef; - - let b = U256::shl_u128(a, 0); - assert_eq!(b.high, 0); - assert_eq!(b.low, a); - - let b = U256::shl_u128(a, 40); - assert_eq!(b.high, 0x0123456789); - assert_eq!(b.low, 0xabcdef0123456789abcdef0000000000); -} - -#[test] -fn test_leading_zeros() { - assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256); - assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116); - assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252); -} - -#[test] -fn test_sub() { - let a = U256 { high: 7, low: 10 }; - let b = U256 { high: 3, low: 2 }; - let c = U256 { high: 3, low: 15 }; - assert_eq!(a - b, U256 { high: 4, low: 8 }); - assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 }); -} - -#[test] -fn test_div_rem() { - let a = U256 { high: 0, low: 7 }; - let b = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef }; - let c = 0x9876543210abc; - assert_eq!(a.div_rem(c), (0, 7)); - assert_eq!(b.div_rem(c), (0x1207a42fc2c725f9cf302ff31d, 0x7c11e593922a3)); -} diff --git a/library/std/src/f16.rs b/library/std/src/f16.rs index 062de8d6ef622..00095b740ed80 100644 --- a/library/std/src/f16.rs +++ b/library/std/src/f16.rs @@ -4,7 +4,7 @@ //! //! Mathematically significant numbers are provided in the `consts` sub-module. -mod div_euclid; +mod soft; #[cfg(test)] mod tests; @@ -270,7 +270,7 @@ impl f16 { #[unstable(feature = "f16", issue = "116909")] #[must_use = "method returns a new number and does not mutate the original value"] pub fn div_euclid(self, rhs: f16) -> f16 { - div_euclid::div_euclid(self, rhs) + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. diff --git a/library/std/src/f16/div_euclid.rs b/library/std/src/f16/div_euclid.rs deleted file mode 100644 index df770c3af1c25..0000000000000 --- a/library/std/src/f16/div_euclid.rs +++ /dev/null @@ -1,111 +0,0 @@ -#[cfg(test)] -mod tests; - -/// Software implementation of `f16::div_euclid`. -#[allow(dead_code)] // f16::div_euclid is cfg(not(test)) -pub(crate) fn div_euclid(a: f16, b: f16) -> f16 { - if let Some((a_neg, a_exp, a_m)) = normal_form(a) - && let Some((b_neg, b_exp, b_m)) = normal_form(b) - { - let exp = a_exp - b_exp; - match (a_neg, b_neg) { - (false, false) => div_floor(exp, a_m, b_m), - (true, false) => -div_ceil(exp, a_m, b_m), - (false, true) => -div_floor(exp, a_m, b_m), - (true, true) => div_ceil(exp, a_m, b_m), - } - } else { - // `a` or `b` are +-0.0 or infinity or NaN. - // `a / b` is also +-0.0 or infinity or NaN. - // There is no need to round to an integer. - a / b - } -} - -/// Returns `floor((a << exp) / b)`. -/// -/// Requires `2^10 <= a, b < 2^11`. -fn div_floor(exp: i16, a: u16, b: u16) -> f16 { - if exp < 0 { - 0.0 - } else if exp <= 5 { - // aa < (2^11 << 5) = 2^16 - let aa = a << exp; - // q < 2^16 / 2^10 = 2^6 - let q = (aa / b) as u8; - // We have to use `as` because `From for f16` is not yet implemented. - q as f16 - } else if exp <= 16 { - // aa < (2^11 << 16) = 2^27 - let aa = u32::from(a) << exp; - let bb = u32::from(b); - // q < 2^27 / 2^10 = 2^17 - let q = aa / bb; - q as f16 - } else { - // exp >= 17 - // result >= (2^10 << 17) / 2^11 = 2^16 - // Exponent 16 is too large to represent in f16. - f16::INFINITY - } -} - -/// Returns `ceil((a << exp) / b)`. -/// -/// Requires `2^10 <= a, b < 2^11`. -fn div_ceil(exp: i16, a: u16, b: u16) -> f16 { - if exp < 0 { - 1.0 - } else if exp <= 5 { - // aa < (2^11 << 5) = 2^16 - let aa = a << exp; - // q < 2^16 / 2^10 + 1 = 2^6 + 1 - let q = ((aa - 1) / b) as u8 + 1; - // We have to use `as` because `From for f16` is not yet implemented. - q as f16 - } else if exp <= 16 { - // aa <= ((2^11 - 1) << 16) = 2^27 - 2^16 - let aa = u32::from(a) << exp; - let bb = u32::from(b); - // q <= (2^27 - 2^16) / 2^10 + 1 = 2^17 - 2^6 + 1 - let q = (aa - 1) / bb + 1; - q as f16 - } else { - // exp >= 17 - // result >= (2^10 << 17) / 2^11 = 2^16 - // Exponent 16 is too large to represent in f16. - f16::INFINITY - } -} - -/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`. -/// -/// `x = (-1)^sign * 2^exp * mantissa` -/// -/// `2^10 <= mantissa < 2^11` -fn normal_form(x: f16) -> Option<(bool, i16, u16)> { - let bits = x.to_bits(); - let sign = bits >> 15 != 0; - let biased_exponent = (bits >> 10 & 0x1f) as i16; - let significand = bits & 0x3ff; - match biased_exponent { - 0 if significand == 0 => { - // 0.0 - None - } - 0 => { - // Subnormal number: 2^(-14-10) * significand. - // We want mantissa to have exactly 5 leading zeros. - let shift = significand.leading_zeros() - 5; - Some((sign, -14 - 10 - shift as i16, significand << shift)) - } - 0x1f => { - // Infinity or NaN. - None - } - _ => { - // Normal number: 2^(biased_exponent-15-10) * (2^10 + significand) - Some((sign, biased_exponent - 15 - 10, 1 << 10 | significand)) - } - } -} diff --git a/library/std/src/f16/div_euclid/tests.rs b/library/std/src/f16/div_euclid/tests.rs deleted file mode 100644 index e428f16c98d66..0000000000000 --- a/library/std/src/f16/div_euclid/tests.rs +++ /dev/null @@ -1,13 +0,0 @@ -#[cfg(reliable_f16_math)] -#[test] -fn test_normal_form() { - use crate::f16::div_euclid::normal_form; - - assert_eq!(normal_form(-1.5f16), Some((true, -10, 0x600))); - assert_eq!(normal_form(f16::MIN_POSITIVE), Some((false, -24, 0x400))); - assert_eq!(normal_form(f16::MIN_POSITIVE / 2.0), Some((false, -25, 0x400))); - assert_eq!(normal_form(f16::MAX), Some((false, 5, 0x7ff))); - assert_eq!(normal_form(0.0), None); - assert_eq!(normal_form(f16::INFINITY), None); - assert_eq!(normal_form(f16::NAN), None); -} diff --git a/library/std/src/f16/soft.rs b/library/std/src/f16/soft.rs new file mode 100644 index 0000000000000..2ebd1bed1ef9b --- /dev/null +++ b/library/std/src/f16/soft.rs @@ -0,0 +1,213 @@ +//! Software floating point functions for `f16`. + +#[cfg(test)] +mod tests; + +/// The floating point type. +type FP = f16; + +/// Type of the exponent. +type Exponent = i16; + +/// Type of `FP::to_bits`. +type Bits = u16; + +/// Half as many bits as `Bits`. +type HalfBits = u8; + +/// Twice as many bits as `Bits`. +type DoubleBits = u32; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP as Exponent; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + // We have to use `as` because `From for f16` is not yet implemented. + q as f16 + } else if exp <= FP::MAX_EXP as Exponent { + // Verify that `aa` fits in `DoubleBits`. + const _: () = assert!(FP::MAX_EXP as u32 <= Bits::BITS); + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + let q = aa / bb; + q as FP + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + // We have to use `as` because `From for f16` is not yet implemented. + q as f16 + } else if exp <= FP::MAX_EXP as Exponent { + // Verify that `aa` fits in `DoubleBits`. + const _: () = assert!(FP::MAX_EXP as u32 <= Bits::BITS); + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + let q = (aa - 1) / bb + 1; + q as FP + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} diff --git a/library/std/src/f16/soft/tests.rs b/library/std/src/f16/soft/tests.rs new file mode 100644 index 0000000000000..ce277a20771b5 --- /dev/null +++ b/library/std/src/f16/soft/tests.rs @@ -0,0 +1,39 @@ +use crate::f16::soft::{Positive, PositiveFinite, Representation, Sign}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f16), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -10, mantissa: 0x600 }), + }); + + assert_eq!(Representation::new(f16::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -24, mantissa: 0x400 }), + }); + + assert_eq!(Representation::new(f16::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -25, mantissa: 0x400 }), + }); + + assert_eq!(Representation::new(f16::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 5, mantissa: 0x7ff }), + }); + + assert_eq!(Representation::new(-0.0), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f16::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f16::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} diff --git a/library/std/src/f16/tests.rs b/library/std/src/f16/tests.rs index 4fc5f5d3dec09..141f07e54a2cf 100644 --- a/library/std/src/f16/tests.rs +++ b/library/std/src/f16/tests.rs @@ -443,7 +443,6 @@ fn test_mul_add() { } #[test] -#[cfg(reliable_f16_math)] fn test_div_euclid() { use core::cmp::Ordering; @@ -461,6 +460,16 @@ fn test_div_euclid() { assert_eq!(5.0f16.div_euclid(0.0), inf); assert_eq!((-5.0f16).div_euclid(0.0), -inf); + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f16.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f16.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f16).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f16.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f16.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f16).div_euclid(-inf), 1.0); + // 0 / x assert_eq!(Ordering::Equal, 0.0f16.div_euclid(10.0).total_cmp(&0.0)); assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(10.0).total_cmp(&-0.0)); diff --git a/library/std/src/f32.rs b/library/std/src/f32.rs index 516f6ef0fac3b..6207177768f90 100644 --- a/library/std/src/f32.rs +++ b/library/std/src/f32.rs @@ -12,7 +12,7 @@ #![stable(feature = "rust1", since = "1.0.0")] #![allow(missing_docs)] -mod div_euclid; +mod soft; #[cfg(test)] mod tests; @@ -250,7 +250,7 @@ impl f32 { #[inline] #[stable(feature = "euclidean_division", since = "1.38.0")] pub fn div_euclid(self, rhs: f32) -> f32 { - div_euclid::div_euclid(self, rhs) + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. diff --git a/library/std/src/f32/div_euclid.rs b/library/std/src/f32/div_euclid.rs deleted file mode 100644 index 6040b82d26cb9..0000000000000 --- a/library/std/src/f32/div_euclid.rs +++ /dev/null @@ -1,204 +0,0 @@ -#[cfg(test)] -mod tests; - -/// Software implementation of `f32::div_euclid`. -#[allow(dead_code)] -pub(crate) fn div_euclid(a: f32, b: f32) -> f32 { - if let Some((a_neg, a_exp, a_m)) = normal_form(a) - && let Some((b_neg, b_exp, b_m)) = normal_form(b) - { - let exp = a_exp - b_exp; - match (a_neg, b_neg) { - (false, false) => div_floor(exp, a_m, b_m), - (true, false) => -div_ceil(exp, a_m, b_m), - (false, true) => -div_floor(exp, a_m, b_m), - (true, true) => div_ceil(exp, a_m, b_m), - } - } else { - // `a` or `b` are +-0.0 or infinity or NaN. - // `a / b` is also +-0.0 or infinity or NaN. - // There is no need to round to an integer. - a / b - } -} - -/// Returns `floor((a << exp) / b)`. -/// -/// Requires `2^23 <= a, b < 2^24` -fn div_floor(exp: i32, a: u32, b: u32) -> f32 { - if exp < 0 { - 0.0 - } else if exp <= 8 { - // aa < (2^24 << 8) = 2^32 - let aa = a << exp; - // q < 2^32 / 2^23 = 2^9 - let q = (aa / b) as u16; - q.into() - } else if exp <= 31 { - // aa < (2^24 << 31) = 2^55 - let aa = u64::from(a) << exp; - let bb = u64::from(b); - // q < 2^55 / 2^23 = 2^32 - let q = (aa / bb) as u32; - q as f32 - } else { - // aa >= (2^23 << 31) = 2^54 - // aa < (2^24 << 31) = 2^55 - let aa = u64::from(a) << 31; - let bb = u64::from(b); - // e > 0 - // The result is floor((aa << e) / b). - let e = (exp - 31) as u32; - - // aa = q * b + r - // q >= 2^54 / 2^24 = 2^30 - // q < 2^55 / 2^23 = 2^32 - let q = (aa / bb) as u32; - // 0 <= r < b - let r = (aa % bb) as u32; - - // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b) - // 0 <= (r << e) / b < 2^e - // - // There are two cases: - // 1. floor((r << e) / b) = 0 - // 2. 0 < floor((r << e) / b) < 2^e - // - // In case 1: - // The result is q << e. - // - // In case 2: - // The result is (q << e) + non-zero low e bits. - // This rounds the same way as (q | 1) << e because rounding beyond - // the 25 most significant bits of q depends only on whether the low-order - // bits are non-zero. - // - // Case 1 happens when: - // (r << e) / b < 1 - // (r << e) <= b - 1 - // r <= ((b - 1) >> e) - let case_1_bound = if e < 32 { (b - 1) >> e } else { 0 }; - let q_adj = if r <= case_1_bound { - // Case 1. - q - } else { - // Case 2. - q | 1 - }; - q_adj as f32 * pow2(e) - } -} - -/// Returns ceil((a << exp) / b). -/// -/// Requires `2^23 <= a, b < 2^24` -fn div_ceil(exp: i32, a: u32, b: u32) -> f32 { - if exp < 0 { - 1.0 - } else if exp <= 8 { - // aa < (2^24 << 8) = 2^32 - let aa = a << exp; - // q < 2^32 / 2^23 + 1 = 2^9 + 1 - let q = ((aa - 1) / b) as u16 + 1; - q.into() - } else if exp <= 31 { - // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31 - let aa = u64::from(a) << exp; - let bb = u64::from(b); - // q <= (2^55 - 2^31) / 2^23 + 1 = 2^32 - 2^8 + 1 - let q = ((aa - 1) / bb) as u32 + 1; - q as f32 - } else { - // aa >= (2^23 << 31) = 2^54 - // aa <= ((2^24 - 1) << 31) = 2^55 - 2^31 - let aa = u64::from(a) << 31; - let bb = u64::from(b); - // e > 0 - // The result is ceil((aa << e) / b). - let e = (exp - 31) as u32; - - // aa = q * b + r - // q >= 2^54 / 2^24 = 2^30 - // q <= (2^55 - 2^31) / 2^23 = 2^32 - 2^8 - let q = (aa / bb) as u32; - // 0 <= r < b - let r = (aa % bb) as u32; - - // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b) - // 0 <= (r << e) / b < 2^e - // - // There are three cases: - // 1. ceil((r << e) / b) = 0 - // 2. 0 < ceil((r << e) / b) < 2^e - // 3. ceil((r << e) / b) = 2^e - // - // In case 1: - // The result is q << e. - // - // In case 2: - // The result is (q << e) + non-zero low e bits. - // This rounds the same way as (q | 1) << e because rounding beyond - // the 25 most significant bits of q depends only on whether the low-order - // bits are non-zero. - // - // In case 3: - // The result is (q + 1) << e. - // - // Case 1 happens when r = 0. - // Case 3 happens when: - // (r << e) / b > (1 << e) - 1 - // (r << e) > (b << e) - b - // ((b - r) << e) <= b - 1 - // b - r <= (b - 1) >> e - // r >= b - ((b - 1) >> e) - let case_3_bound = b - if e < 32 { (b - 1) >> e } else { 0 }; - let q_adj = if r == 0 { - // Case 1. - q - } else if r < case_3_bound { - // Case 2. - q | 1 - } else { - // Case 3. - q + 1 - }; - q_adj as f32 * pow2(e) - } -} - -/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`. -/// -/// `x = (-1)^sign * 2^exp * mantissa` -/// -/// `2^23 <= mantissa < 2^24` -fn normal_form(x: f32) -> Option<(bool, i32, u32)> { - let bits = x.to_bits(); - let sign = bits >> 31 != 0; - let biased_exponent = (bits >> 23 & 0xff) as i32; - let significand = bits & 0x7fffff; - match biased_exponent { - 0 if significand == 0 => { - // 0.0 - None - } - 0 => { - // Subnormal number: 2^(-126-23) * significand. - // We want mantissa to have exactly 8 leading zeros. - let shift = significand.leading_zeros() - 8; - Some((sign, -126 - 23 - shift as i32, significand << shift)) - } - 0xff => { - // Infinity or NaN. - None - } - _ => { - // Normal number: 2^(biased_exponent-127-23) * (2^23 + significand) - Some((sign, biased_exponent - 127 - 23, 1 << 23 | significand)) - } - } -} - -/// Returns `2^exp`. -fn pow2(exp: u32) -> f32 { - if exp <= 127 { f32::from_bits((exp + 127) << 23) } else { f32::INFINITY } -} diff --git a/library/std/src/f32/div_euclid/tests.rs b/library/std/src/f32/div_euclid/tests.rs deleted file mode 100644 index e1c139f7e16ed..0000000000000 --- a/library/std/src/f32/div_euclid/tests.rs +++ /dev/null @@ -1,21 +0,0 @@ -#[test] -fn test_normal_form() { - use crate::f32::div_euclid::normal_form; - - assert_eq!(normal_form(-1.5f32), Some((true, -23, 0xc00000))); - assert_eq!(normal_form(f32::MIN_POSITIVE), Some((false, -149, 0x800000))); - assert_eq!(normal_form(f32::MIN_POSITIVE / 2.0), Some((false, -150, 0x800000))); - assert_eq!(normal_form(f32::MAX), Some((false, 104, 0xffffff))); - assert_eq!(normal_form(0.0), None); - assert_eq!(normal_form(f32::INFINITY), None); - assert_eq!(normal_form(f32::NAN), None); -} - -#[test] -fn test_pow2() { - use crate::f32::div_euclid::pow2; - - assert_eq!(pow2(0), 1.0f32); - assert_eq!(pow2(4), 16.0f32); - assert_eq!(pow2(128), f32::INFINITY); -} diff --git a/library/std/src/f32/soft.rs b/library/std/src/f32/soft.rs new file mode 100644 index 0000000000000..9a280893eccf6 --- /dev/null +++ b/library/std/src/f32/soft.rs @@ -0,0 +1,344 @@ +//! Software floating point functions for `f32`. + +#[cfg(test)] +mod tests; + +/// The floating point type. +type FP = f32; + +/// Type of the exponent. +type Exponent = i32; + +/// Type of `FP::to_bits`. +type Bits = u32; + +/// Half as many bits as `Bits`. +type HalfBits = u16; + +/// Twice as many bits as `Bits`. +type DoubleBits = u64; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Shift right that works even if `shift >= Bits::BITS`. +/// +/// Requires `shift >= 0`. +fn safe_shr(a: Bits, shift: Exponent) -> Bits { + if shift < Bits::BITS as Exponent { a >> shift } else { 0 } +} + +/// Returns `2^exp`. +fn pow2(exp: Exponent) -> FP { + let biased_exponent = exp + EXPONENT_BIAS; + if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) { + // Round to 0. + 0.0 + } else if biased_exponent <= 0 { + // Subnormal. + FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1)) + } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 { + // Normal. + FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS) + } else { + // Round to infinity. + FP::INFINITY + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + let q = (aa / bb) as Bits; + q as FP + } else { + // exp >= Bits::BITS + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + + // e > 0 + // The result is `floor((aa << e) / bb)` + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are two cases: + // 1. floor((r << e) / bb) = 0 + // 2. 0 < floor((r << e) / bb) < 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // Case 1 happens when: + // (r << e) / bb < 1 + // (r << e) <= bb - 1 + // r <= ((bb - 1) >> e) + let case_1_bound = safe_shr(b.mantissa - 1, e); + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + }; + q_adj as FP * pow2(e) + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1 + // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX + let q = ((aa - 1) / bb) as Bits + 1; + q as FP + } else { + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + // e > 0 + // The result is ceil((aa << e) / b). + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q < Bits::MAX` as in the previous case. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are three cases: + // 1. ceil((r << e) / bb) = 0 + // 2. 0 < ceil((r << e) / bb) < 2^e + // 3. ceil((r << e) / bb) = 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / bb > (1 << e) - 1 + // (r << e) > (bb << e) - b + // ((bb - r) << e) <= bb - 1 + // bb - r <= (bb - 1) >> e + // r >= bb - ((bb - 1) >> e) + let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e); + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + } else { + // Case 3. + // No overflow because `q < Bits::MAX`. + q + 1 + }; + q_adj as FP * pow2(e) + } +} diff --git a/library/std/src/f32/soft/tests.rs b/library/std/src/f32/soft/tests.rs new file mode 100644 index 0000000000000..e7b7affab9ab5 --- /dev/null +++ b/library/std/src/f32/soft/tests.rs @@ -0,0 +1,49 @@ +use crate::f32::soft::{Positive, PositiveFinite, Representation, Sign, pow2}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f32), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -23, mantissa: 0xc00000 }), + }); + + assert_eq!(Representation::new(f32::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -149, mantissa: 0x800000 }), + }); + + assert_eq!(Representation::new(f32::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -150, mantissa: 0x800000 }), + }); + + assert_eq!(Representation::new(f32::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 104, mantissa: 0xffffff }), + }); + + assert_eq!(Representation::new(-0.0), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f32::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f32::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} + +#[test] +fn test_pow2() { + assert_eq!(pow2(2), 4.0); + assert_eq!(pow2(-1), 0.5); + assert_eq!(pow2(-150), 0.0); + assert_eq!(pow2(-126), f32::MIN_POSITIVE); + assert_eq!(pow2(-127), f32::MIN_POSITIVE / 2.0); + assert_eq!(pow2(128), f32::INFINITY); +} diff --git a/library/std/src/f32/tests.rs b/library/std/src/f32/tests.rs index 895719432000e..1bdc80a4f37b8 100644 --- a/library/std/src/f32/tests.rs +++ b/library/std/src/f32/tests.rs @@ -441,6 +441,16 @@ fn test_div_euclid() { assert_eq!(5.0f32.div_euclid(0.0), inf); assert_eq!((-5.0f32).div_euclid(0.0), -inf); + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f32.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f32.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f32).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f32.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f32.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f32).div_euclid(-inf), 1.0); + // 0 / x assert_eq!(Ordering::Equal, 0.0f32.div_euclid(10.0).total_cmp(&0.0)); assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(10.0).total_cmp(&-0.0)); @@ -448,6 +458,7 @@ fn test_div_euclid() { // small / large assert_eq!(Ordering::Equal, 1.0f32.div_euclid(10.0).total_cmp(&0.0)); assert_eq!((-1.0f32).div_euclid(10.0), -1.0); + println!("{}", 1.0f32.div_euclid(-10.0)); assert_eq!(Ordering::Equal, 1.0f32.div_euclid(-10.0).total_cmp(&-0.0)); assert_eq!((-1.0f32).div_euclid(-10.0), 1.0); diff --git a/library/std/src/f64.rs b/library/std/src/f64.rs index fb17857ec6d94..751c77faff1a5 100644 --- a/library/std/src/f64.rs +++ b/library/std/src/f64.rs @@ -12,7 +12,7 @@ #![stable(feature = "rust1", since = "1.0.0")] #![allow(missing_docs)] -mod div_euclid; +mod soft; #[cfg(test)] mod tests; @@ -250,7 +250,7 @@ impl f64 { #[inline] #[stable(feature = "euclidean_division", since = "1.38.0")] pub fn div_euclid(self, rhs: f64) -> f64 { - div_euclid::div_euclid(self, rhs) + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. diff --git a/library/std/src/f64/div_euclid.rs b/library/std/src/f64/div_euclid.rs deleted file mode 100644 index f4346dd43afff..0000000000000 --- a/library/std/src/f64/div_euclid.rs +++ /dev/null @@ -1,204 +0,0 @@ -#[cfg(test)] -mod tests; - -/// Software implementation of `f64::div_euclid`. -#[allow(dead_code)] -pub(crate) fn div_euclid(a: f64, b: f64) -> f64 { - if let Some((a_neg, a_exp, a_m)) = normal_form(a) - && let Some((b_neg, b_exp, b_m)) = normal_form(b) - { - let exp = a_exp - b_exp; - match (a_neg, b_neg) { - (false, false) => div_floor(exp, a_m, b_m), - (true, false) => -div_ceil(exp, a_m, b_m), - (false, true) => -div_floor(exp, a_m, b_m), - (true, true) => div_ceil(exp, a_m, b_m), - } - } else { - // `a` or `b` are +-0.0 or infinity or NaN. - // `a / b` is also +-0.0 or infinity or NaN. - // There is no need to round to an integer. - a / b - } -} - -/// Returns `floor((a << exp) / b)`. -/// -/// Requires `2^52 <= a, b < 2^53`. -fn div_floor(exp: i32, a: u64, b: u64) -> f64 { - if exp < 0 { - 0.0 - } else if exp <= 11 { - // aa < (2^53 << 11) = 2^64 - let aa = a << exp; - // q < 2^64 / 2^52 = 2^12 - let q = (aa / b) as u32; - q.into() - } else if exp <= 63 { - // aa < (2^53 << 63) = 2^116 - let aa = u128::from(a) << exp; - let bb = u128::from(b); - // q < 2^116 / 2^52 = 2^64 - let q = (aa / bb) as u64; - q as f64 - } else { - // aa >= (2^52 << 63) = 2^115 - // aa < (2^53 << 63) = 2^116 - let aa = u128::from(a) << 63; - let bb = u128::from(b); - // e > 0 - // The result is floor((aa << e) / b). - let e = (exp - 63) as u32; - - // aa = q * b + r - // q >= 2^115 / 2^53 = 2^62 - // q < 2^116 / 2^52 = 2^64 - let q = (aa / bb) as u64; - // 0 <= r < b - let r = (aa % bb) as u64; - - // result = floor((aa << e) / b) = (q << e) + floor((r << e) / b) - // 0 <= (r << e) / b < 2^e - // - // There are two cases: - // 1. floor((r << e) / b) = 0 - // 2. 0 < floor((r << e) / b) < 2^e - // - // In case 1: - // The result is q << e. - // - // In case 2: - // The result is (q << e) + non-zero low e bits. - // This rounds the same way as (q | 1) << e because rounding beyond - // the 25 most significant bits of q depends only on whether the low-order - // bits are non-zero. - // - // Case 1 happens when: - // (r << e) / b < 1 - // (r << e) <= b - 1 - // r <= ((b - 1) >> e) - let case_1_bound = if e < 64 { (b - 1) >> e } else { 0 }; - let q_adj = if r <= case_1_bound { - // Case 1. - q - } else { - // Case 2. - q | 1 - }; - q_adj as f64 * pow2(e) - } -} - -/// Returns `ceil((a << exp) / b)`. -/// -/// Requires `2^52 <= a, b < 2^53`. -fn div_ceil(exp: i32, a: u64, b: u64) -> f64 { - if exp < 0 { - 1.0 - } else if exp <= 11 { - // aa < (2^53 << 11) = 2^64 - let aa = a << exp; - // q < 2^64 / 2^52 + 1 = 2^12 + 1 - let q = ((aa - 1) / b) as u32 + 1; - q.into() - } else if exp <= 63 { - // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63 - let aa = u128::from(a) << exp; - let bb = u128::from(b); - // q <= (2^116 - 2^63) / 2^52 + 1 = 2^64 - 2^11 + 1 - let q = ((aa - 1) / bb) as u64 + 1; - q as f64 - } else { - // aa >= (2^52 << 63) = 2^115 - // aa <= ((2^53 - 1) << 63) = 2^116 - 2^63 - let aa = u128::from(a) << 63; - let bb = u128::from(b); - // e > 0 - // The result is ceil((aa << e) / b). - let e = (exp - 63) as u32; - - // aa = q * b + r - // q >= 2^115 / 2^53 = 2^62 - // q <= (2^116 - 2^63) / 2^52 = 2^64 - 2^11 - let q = (aa / bb) as u64; - // 0 <= r < b - let r = (aa % bb) as u64; - - // result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b) - // 0 <= (r << e) / b < 2^e - // - // There are three cases: - // 1. ceil((r << e) / b) = 0 - // 2. 0 < ceil((r << e) / b) < 2^e - // 3. ceil((r << e) / b) = 2^e - // - // In case 1: - // The result is q << e. - // - // In case 2: - // The result is (q << e) + non-zero low e bits. - // This rounds the same way as (q | 1) << e because rounding beyond - // the 54 most significant bits of q depends only on whether the low-order - // bits are non-zero. - // - // In case 3: - // The result is (q + 1) << e. - // - // Case 1 happens when r = 0. - // Case 3 happens when: - // (r << e) / b > (1 << e) - 1 - // (r << e) > (b << e) - b - // ((b - r) << e) <= b - 1 - // b - r <= (b - 1) >> e - // r >= b - ((b - 1) >> e) - let case_3_bound = b - if e < 64 { (b - 1) >> e } else { 0 }; - let q_adj = if r == 0 { - // Case 1. - q - } else if r < case_3_bound { - // Case 2. - q | 1 - } else { - // Case 3. - q + 1 - }; - q_adj as f64 * pow2(e) - } -} - -/// For finite, non-zero numbers returns `(sign, exponent, mantissa)`. -/// -/// `x = (-1)^sign * 2^exp * mantissa` -/// -/// `2^52 <= mantissa < 2^53` -fn normal_form(x: f64) -> Option<(bool, i32, u64)> { - let bits = x.to_bits(); - let sign = bits >> 63 != 0; - let biased_exponent = (bits >> 52 & 0x7ff) as i32; - let significand = bits & ((1 << 52) - 1); - match biased_exponent { - 0 if significand == 0 => { - // 0.0 - None - } - 0 => { - // Subnormal number: 2^(-1022-52) * significand. - // We want mantissa to have exactly 11 leading zeros. - let shift = significand.leading_zeros() - 11; - Some((sign, -1022 - 52 - shift as i32, significand << shift)) - } - 0x7ff => { - // Infinity or NaN. - None - } - _ => { - // Normal number: 2^(biased_exponent-1023-52) * (2^52 + significand) - Some((sign, biased_exponent - 1023 - 52, 1 << 52 | significand)) - } - } -} - -/// Returns `2^exp`. -fn pow2(exp: u32) -> f64 { - if exp <= 1023 { f64::from_bits(u64::from(exp + 1023) << 52) } else { f64::INFINITY } -} diff --git a/library/std/src/f64/div_euclid/tests.rs b/library/std/src/f64/div_euclid/tests.rs deleted file mode 100644 index eeb43a6b7d868..0000000000000 --- a/library/std/src/f64/div_euclid/tests.rs +++ /dev/null @@ -1,21 +0,0 @@ -#[test] -fn test_normal_form() { - use crate::f64::div_euclid::normal_form; - - assert_eq!(normal_form(-1.5f64), Some((true, -52, 3 << 51))); - assert_eq!(normal_form(f64::MIN_POSITIVE), Some((false, -1074, 1 << 52))); - assert_eq!(normal_form(f64::MIN_POSITIVE / 2.0), Some((false, -1075, 1 << 52))); - assert_eq!(normal_form(f64::MAX), Some((false, 971, (1 << 53) - 1))); - assert_eq!(normal_form(0.0), None); - assert_eq!(normal_form(f64::INFINITY), None); - assert_eq!(normal_form(f64::NAN), None); -} - -#[test] -fn test_pow2() { - use crate::f64::div_euclid::pow2; - - assert_eq!(pow2(0), 1.0f64); - assert_eq!(pow2(4), 16.0f64); - assert_eq!(pow2(1024), f64::INFINITY); -} diff --git a/library/std/src/f64/soft.rs b/library/std/src/f64/soft.rs new file mode 100644 index 0000000000000..1a68dfdc85c2f --- /dev/null +++ b/library/std/src/f64/soft.rs @@ -0,0 +1,344 @@ +//! Software floating point functions for `f64`. + +#[cfg(test)] +mod tests; + +/// The floating point type. +type FP = f64; + +/// Type of the exponent. +type Exponent = i32; + +/// Type of `FP::to_bits`. +type Bits = u64; + +/// Half as many bits as `Bits`. +type HalfBits = u32; + +/// Twice as many bits as `Bits`. +type DoubleBits = u128; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Shift right that works even if `shift >= Bits::BITS`. +/// +/// Requires `shift >= 0`. +fn safe_shr(a: Bits, shift: Exponent) -> Bits { + if shift < Bits::BITS as Exponent { a >> shift } else { 0 } +} + +/// Returns `2^exp`. +fn pow2(exp: Exponent) -> FP { + let biased_exponent = exp + EXPONENT_BIAS; + if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) { + // Round to 0. + 0.0 + } else if biased_exponent <= 0 { + // Subnormal. + FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1)) + } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 { + // Normal. + FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS) + } else { + // Round to infinity. + FP::INFINITY + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + let q = (aa / bb) as Bits; + q as FP + } else { + // exp >= Bits::BITS + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + + // e > 0 + // The result is `floor((aa << e) / bb)` + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are two cases: + // 1. floor((r << e) / bb) = 0 + // 2. 0 < floor((r << e) / bb) < 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // Case 1 happens when: + // (r << e) / bb < 1 + // (r << e) <= bb - 1 + // r <= ((bb - 1) >> e) + let case_1_bound = safe_shr(b.mantissa - 1, e); + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + }; + q_adj as FP * pow2(e) + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1 + // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX + let q = ((aa - 1) / bb) as Bits + 1; + q as FP + } else { + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + // e > 0 + // The result is ceil((aa << e) / b). + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q < Bits::MAX` as in the previous case. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are three cases: + // 1. ceil((r << e) / bb) = 0 + // 2. 0 < ceil((r << e) / bb) < 2^e + // 3. ceil((r << e) / bb) = 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / bb > (1 << e) - 1 + // (r << e) > (bb << e) - b + // ((bb - r) << e) <= bb - 1 + // bb - r <= (bb - 1) >> e + // r >= bb - ((bb - 1) >> e) + let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e); + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + } else { + // Case 3. + // No overflow because `q < Bits::MAX`. + q + 1 + }; + q_adj as FP * pow2(e) + } +} diff --git a/library/std/src/f64/soft/tests.rs b/library/std/src/f64/soft/tests.rs new file mode 100644 index 0000000000000..c74b2bbebaadb --- /dev/null +++ b/library/std/src/f64/soft/tests.rs @@ -0,0 +1,49 @@ +use crate::f64::soft::{Positive, PositiveFinite, Representation, Sign, pow2}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f64), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -52, mantissa: 3 << 51 }), + }); + + assert_eq!(Representation::new(f64::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -1074, mantissa: 1 << 52 }), + }); + + assert_eq!(Representation::new(f64::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -1075, mantissa: 1 << 52 }), + }); + + assert_eq!(Representation::new(f64::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 971, mantissa: (1 << 53) - 1 }), + }); + + assert_eq!(Representation::new(-0.0f64), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f64::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f64::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} + +#[test] +fn test_pow2() { + assert_eq!(pow2(2), 4.0); + assert_eq!(pow2(-1), 0.5); + assert_eq!(pow2(-1075), 0.0); + assert_eq!(pow2(-1022), f64::MIN_POSITIVE); + assert_eq!(pow2(-1023), f64::MIN_POSITIVE / 2.0); + assert_eq!(pow2(1024), f64::INFINITY); +} diff --git a/library/std/src/f64/tests.rs b/library/std/src/f64/tests.rs index 0499cc4516b4e..b6147326818ea 100644 --- a/library/std/src/f64/tests.rs +++ b/library/std/src/f64/tests.rs @@ -429,6 +429,16 @@ fn test_div_euclid() { assert_eq!(5.0f64.div_euclid(0.0), inf); assert_eq!((-5.0f64).div_euclid(0.0), -inf); + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f64.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f64.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f64).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f64.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f64.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f64).div_euclid(-inf), 1.0); + // 0 / x assert_eq!(Ordering::Equal, 0.0f64.div_euclid(10.0).total_cmp(&0.0)); assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(10.0).total_cmp(&-0.0)); diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index dbbfed619559d..df5d6f6cd3930 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -677,6 +677,7 @@ pub mod alloc; // Private support modules mod panicking; +mod u256; #[path = "../../backtrace/src/lib.rs"] #[allow(dead_code, unused_attributes, fuzzy_provenance_casts, unsafe_op_in_unsafe_fn)] diff --git a/library/std/src/u256.rs b/library/std/src/u256.rs new file mode 100644 index 0000000000000..7094753257dc6 --- /dev/null +++ b/library/std/src/u256.rs @@ -0,0 +1,117 @@ +//! Limited unsigned 256-bit arithmetic for internal use. + +#[cfg(test)] +mod tests; + +use core::ops::{BitOr, BitOrAssign, Div, DivAssign, Shl, ShlAssign, Sub, SubAssign}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub(crate) struct U256 { + high: u128, + low: u128, +} + +impl U256 { + pub(crate) const ZERO: Self = Self { high: 0, low: 0 }; + + pub(crate) const ONE: Self = Self { high: 0, low: 1 }; + + /// The number of leading 0 bits. + pub(crate) fn leading_zeros(self) -> u32 { + if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() } + } + + /// Returns (quotient, remainder). + pub(crate) fn div_rem(mut self, rhs: Self) -> (Self, Self) { + assert!(rhs != Self::ZERO); + let shift_limit = (rhs.leading_zeros() + 1).saturating_sub(self.leading_zeros()); + let mut q = Self::ZERO; + for shift in (0..shift_limit).rev() { + let rhs_shifted = rhs << shift; + if self >= rhs_shifted { + q |= Self::ONE << shift; + self -= rhs_shifted; + } + } + (q, self) + } + + /// Wrap to `u128`. + pub(crate) fn wrap_u128(self) -> u128 { + self.low + } +} + +impl From for U256 { + fn from(value: u128) -> Self { + Self { high: 0, low: value } + } +} + +impl Shl for U256 { + type Output = Self; + + fn shl(self, rhs: u32) -> Self { + if rhs == 0 { + self + } else if rhs < 128 { + Self { high: self.high << rhs | self.low >> (128 - rhs), low: self.low << rhs } + } else if rhs == 128 { + Self { high: self.low, low: 0 } + } else if rhs < 256 { + Self { high: self.low << (rhs - 128), low: 0 } + } else { + Self::ZERO + } + } +} + +impl ShlAssign for U256 { + fn shl_assign(&mut self, rhs: u32) { + *self = *self << rhs; + } +} + +impl Sub for U256 { + type Output = Self; + + fn sub(self, rhs: U256) -> Self { + let (low, borrow) = self.low.overflowing_sub(rhs.low); + let (high, _) = self.high.borrowing_sub(rhs.high, borrow); + Self { low, high } + } +} + +impl SubAssign for U256 { + fn sub_assign(&mut self, rhs: U256) { + *self = *self - rhs; + } +} + +impl Div for U256 { + type Output = Self; + + fn div(self, rhs: U256) -> Self { + self.div_rem(rhs).0 + } +} + +impl DivAssign for U256 { + fn div_assign(&mut self, rhs: U256) { + *self = *self / rhs; + } +} + +impl BitOr for U256 { + type Output = Self; + + fn bitor(self, rhs: U256) -> Self { + Self { high: self.high | rhs.high, low: self.low | rhs.low } + } +} + +impl BitOrAssign for U256 { + fn bitor_assign(&mut self, rhs: U256) { + *self = *self | rhs; + } +} diff --git a/library/std/src/u256/tests.rs b/library/std/src/u256/tests.rs new file mode 100644 index 0000000000000..0466b519d6f5d --- /dev/null +++ b/library/std/src/u256/tests.rs @@ -0,0 +1,66 @@ +use crate::u256::U256; + +#[test] +fn test_leading_zeros() { + assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256); + assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116); + assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252); +} + +#[test] +fn test_wrap_u128() { + assert_eq!(U256 { high: 3, low: 4 }.wrap_u128(), 4); +} + +#[test] +fn test_from_u128() { + assert_eq!(U256::from(7u128), U256 { high: 0, low: 7 }); +} + +#[test] +fn test_shl() { + let a = U256 { high: 0x1234, low: 0x0123456789abcdef0123456789abcdef }; + + assert_eq!(a << 0, a); + assert_eq!(a << 16, U256 { high: 0x12340123, low: 0x456789abcdef0123456789abcdef0000 }); + assert_eq!(a << 128, U256 { high: 0x0123456789abcdef0123456789abcdef, low: 0 }); + assert_eq!(a << 140, U256 { high: 0x3456789abcdef0123456789abcdef000, low: 0 }); + assert_eq!(a << 256, U256::ZERO); + assert_eq!(a << 1000, U256::ZERO); +} + +#[test] +fn test_sub() { + let a = U256 { high: 7, low: 10 }; + let b = U256 { high: 3, low: 2 }; + let c = U256 { high: 3, low: 15 }; + assert_eq!(a - b, U256 { high: 4, low: 8 }); + assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 }); +} + +#[test] +fn test_div_rem() { + let a = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef }; + let b = U256 { high: 0, low: 0x987 }; + let c = U256 { high: 0x13, low: 0x9876543210abc }; + assert_eq!( + a.div_rem(b), + (U256 { high: 0x1208, low: 0x63d18447855b62f52e9a6983a9e22813 }, U256 { + high: 0, + low: 0xea + }) + ); + assert_eq!( + a.div_rem(c), + (U256 { high: 0, low: 0x90ad6 }, U256 { + high: 0xd, + low: 0x0123456789abcd98d752c5f8c1057cc7 + },) + ); + assert_eq!(c.div_rem(a), (U256::ZERO, c)); +} + +#[test] +fn test_div() { + assert_eq!(U256::from(47u128) / U256::from(10u128), U256::from(4u128)); +} From 009cf4b8f8f855318bcd7dde209110c166eb8145 Mon Sep 17 00:00:00 2001 From: Tomek Czajka Date: Thu, 12 Dec 2024 11:15:14 +0100 Subject: [PATCH 4/5] div_euclid benchmarks --- library/std/benches/f128/mod.rs | 18 ++++++++++++++++++ library/std/benches/f16/mod.rs | 13 +++++++++++++ library/std/benches/f32/mod.rs | 18 ++++++++++++++++++ library/std/benches/f64/mod.rs | 18 ++++++++++++++++++ library/std/benches/lib.rs | 6 ++++++ 5 files changed, 73 insertions(+) create mode 100644 library/std/benches/f128/mod.rs create mode 100644 library/std/benches/f16/mod.rs create mode 100644 library/std/benches/f32/mod.rs create mode 100644 library/std/benches/f64/mod.rs diff --git a/library/std/benches/f128/mod.rs b/library/std/benches/f128/mod.rs new file mode 100644 index 0000000000000..08ca8b3dd005c --- /dev/null +++ b/library/std/benches/f128/mod.rs @@ -0,0 +1,18 @@ +use core::f128::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(10000.12345f128).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_medium(b: &mut Bencher) { + b.iter(|| black_box(1.123e30f128).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(1.123e4000f128).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/f16/mod.rs b/library/std/benches/f16/mod.rs new file mode 100644 index 0000000000000..e0d33df31ef78 --- /dev/null +++ b/library/std/benches/f16/mod.rs @@ -0,0 +1,13 @@ +use core::f16::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(20.12f16).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(50000.0f16).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/f32/mod.rs b/library/std/benches/f32/mod.rs new file mode 100644 index 0000000000000..7b45e28e09c48 --- /dev/null +++ b/library/std/benches/f32/mod.rs @@ -0,0 +1,18 @@ +use core::f32::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(130.12345f32).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_medium(b: &mut Bencher) { + b.iter(|| black_box(1.123e7f32).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(1.123e32f32).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/f64/mod.rs b/library/std/benches/f64/mod.rs new file mode 100644 index 0000000000000..fae017b69d93d --- /dev/null +++ b/library/std/benches/f64/mod.rs @@ -0,0 +1,18 @@ +use core::f64::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(1234.1234578f64).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_medium(b: &mut Bencher) { + b.iter(|| black_box(1.123e15f64).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(1.123e300f64).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/lib.rs b/library/std/benches/lib.rs index 1b21c230a0bf2..7497c65490d97 100644 --- a/library/std/benches/lib.rs +++ b/library/std/benches/lib.rs @@ -1,7 +1,13 @@ // Disabling in Miri as these would take too long. #![cfg(not(miri))] #![feature(test)] +#![feature(f16)] +#![feature(f128)] extern crate test; +mod f128; +mod f16; +mod f32; +mod f64; mod hash; From d0f8bae821930eb0a4e34b0326f36bb3ff374ef5 Mon Sep 17 00:00:00 2001 From: Tomek Czajka Date: Thu, 12 Dec 2024 11:36:05 +0100 Subject: [PATCH 5/5] div_euclid: add a fast path for overflow --- library/std/src/f128/soft.rs | 12 ++++++++++-- library/std/src/f32/soft.rs | 12 ++++++++++-- library/std/src/f64/soft.rs | 12 ++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/library/std/src/f128/soft.rs b/library/std/src/f128/soft.rs index 30b6e82e18086..86befe8dd92d0 100644 --- a/library/std/src/f128/soft.rs +++ b/library/std/src/f128/soft.rs @@ -201,7 +201,7 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. let q: Bits = (aa / bb).wrap_u128(); q as FP - } else { + } else if exp <= FP::MAX_EXP { // exp >= Bits::BITS let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); let bb = DoubleBits::from(b.mantissa); @@ -256,6 +256,10 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { q | 1 }; q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY } } @@ -282,7 +286,7 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX let q = ((aa - U256::ONE) / bb).wrap_u128() + 1; q as FP - } else { + } else if exp <= FP::MAX_EXP { let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); let bb = DoubleBits::from(b.mantissa); // e > 0 @@ -346,5 +350,9 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { q + 1 }; q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY } } diff --git a/library/std/src/f32/soft.rs b/library/std/src/f32/soft.rs index 9a280893eccf6..f5af4561881ad 100644 --- a/library/std/src/f32/soft.rs +++ b/library/std/src/f32/soft.rs @@ -198,7 +198,7 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. let q = (aa / bb) as Bits; q as FP - } else { + } else if exp <= FP::MAX_EXP { // exp >= Bits::BITS let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); let bb = DoubleBits::from(b.mantissa); @@ -252,6 +252,10 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { q | 1 }; q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY } } @@ -277,7 +281,7 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX let q = ((aa - 1) / bb) as Bits + 1; q as FP - } else { + } else if exp <= FP::MAX_EXP { let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); let bb = DoubleBits::from(b.mantissa); // e > 0 @@ -340,5 +344,9 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { q + 1 }; q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY } } diff --git a/library/std/src/f64/soft.rs b/library/std/src/f64/soft.rs index 1a68dfdc85c2f..2106a25aaa2c7 100644 --- a/library/std/src/f64/soft.rs +++ b/library/std/src/f64/soft.rs @@ -198,7 +198,7 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. let q = (aa / bb) as Bits; q as FP - } else { + } else if exp <= FP::MAX_EXP { // exp >= Bits::BITS let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); let bb = DoubleBits::from(b.mantissa); @@ -252,6 +252,10 @@ fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { q | 1 }; q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY } } @@ -277,7 +281,7 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX let q = ((aa - 1) / bb) as Bits + 1; q as FP - } else { + } else if exp <= FP::MAX_EXP { let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); let bb = DoubleBits::from(b.mantissa); // e > 0 @@ -340,5 +344,9 @@ fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { q + 1 }; q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY } }