diff --git a/library/std/benches/f128/mod.rs b/library/std/benches/f128/mod.rs new file mode 100644 index 0000000000000..08ca8b3dd005c --- /dev/null +++ b/library/std/benches/f128/mod.rs @@ -0,0 +1,18 @@ +use core::f128::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(10000.12345f128).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_medium(b: &mut Bencher) { + b.iter(|| black_box(1.123e30f128).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(1.123e4000f128).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/f16/mod.rs b/library/std/benches/f16/mod.rs new file mode 100644 index 0000000000000..e0d33df31ef78 --- /dev/null +++ b/library/std/benches/f16/mod.rs @@ -0,0 +1,13 @@ +use core::f16::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(20.12f16).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(50000.0f16).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/f32/mod.rs b/library/std/benches/f32/mod.rs new file mode 100644 index 0000000000000..7b45e28e09c48 --- /dev/null +++ b/library/std/benches/f32/mod.rs @@ -0,0 +1,18 @@ +use core::f32::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(130.12345f32).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_medium(b: &mut Bencher) { + b.iter(|| black_box(1.123e7f32).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(1.123e32f32).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/f64/mod.rs b/library/std/benches/f64/mod.rs new file mode 100644 index 0000000000000..fae017b69d93d --- /dev/null +++ b/library/std/benches/f64/mod.rs @@ -0,0 +1,18 @@ +use core::f64::consts::PI; + +use test::{Bencher, black_box}; + +#[bench] +fn div_euclid_small(b: &mut Bencher) { + b.iter(|| black_box(1234.1234578f64).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_medium(b: &mut Bencher) { + b.iter(|| black_box(1.123e15f64).div_euclid(black_box(PI))); +} + +#[bench] +fn div_euclid_large(b: &mut Bencher) { + b.iter(|| black_box(1.123e300f64).div_euclid(black_box(PI))); +} diff --git a/library/std/benches/lib.rs b/library/std/benches/lib.rs index 1b21c230a0bf2..7497c65490d97 100644 --- a/library/std/benches/lib.rs +++ b/library/std/benches/lib.rs @@ -1,7 +1,13 @@ // Disabling in Miri as these would take too long. #![cfg(not(miri))] #![feature(test)] +#![feature(f16)] +#![feature(f128)] extern crate test; +mod f128; +mod f16; +mod f32; +mod f64; mod hash; diff --git a/library/std/src/f128.rs b/library/std/src/f128.rs index e93e915159e40..a206ef8fa47c9 100644 --- a/library/std/src/f128.rs +++ b/library/std/src/f128.rs @@ -4,6 +4,8 @@ //! //! Mathematically significant numbers are provided in the `consts` sub-module. +mod soft; + #[cfg(test)] mod tests; @@ -235,10 +237,14 @@ impl f128 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -264,29 +270,23 @@ impl f128 { #[unstable(feature = "f128", issue = "116909")] #[must_use = "method returns a new number and does not mutate the original value"] pub fn div_euclid(self, rhs: f128) -> f128 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f128/soft.rs b/library/std/src/f128/soft.rs new file mode 100644 index 0000000000000..86befe8dd92d0 --- /dev/null +++ b/library/std/src/f128/soft.rs @@ -0,0 +1,358 @@ +//! Software floating point functions for `f128`. + +#[cfg(test)] +mod tests; + +use crate::u256::U256; + +/// The floating point type. +type FP = f128; + +/// Type of the exponent. +type Exponent = i32; + +/// Type of `FP::to_bits`. +type Bits = u128; + +/// Half as many bits as `Bits`. +type HalfBits = u64; + +/// Twice as many bits as `Bits`. +type DoubleBits = U256; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Shift right that works even if `shift >= Bits::BITS`. +/// +/// Requires `shift >= 0`. +fn safe_shr(a: Bits, shift: Exponent) -> Bits { + if shift < Bits::BITS as Exponent { a >> shift } else { 0 } +} + +/// Returns `2^exp`. +fn pow2(exp: Exponent) -> FP { + let biased_exponent = exp + EXPONENT_BIAS; + if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) { + // Round to 0. + 0.0 + } else if biased_exponent <= 0 { + // Subnormal. + FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1)) + } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 { + // Normal. + FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS) + } else { + // Round to infinity. + FP::INFINITY + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + // We have to use `as` because `From for f128` is not yet implemented. + q as f128 + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp as u32; + let bb = DoubleBits::from(b.mantissa); + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + let q: Bits = (aa / bb).wrap_u128(); + q as FP + } else if exp <= FP::MAX_EXP { + // exp >= Bits::BITS + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + + // e > 0 + // The result is `floor((aa << e) / bb)` + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + // 0 <= r < b + let (q, r) = aa.div_rem(bb); + let q: Bits = q.wrap_u128(); + let r: Bits = r.wrap_u128(); + + // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are two cases: + // 1. floor((r << e) / bb) = 0 + // 2. 0 < floor((r << e) / bb) < 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // Case 1 happens when: + // (r << e) / bb < 1 + // (r << e) <= bb - 1 + // r <= ((bb - 1) >> e) + let case_1_bound = safe_shr(b.mantissa - 1, e); + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + }; + q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + // We have to use `as` because `From for f128` is not yet implemented. + q as f128 + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp as u32; + let bb = DoubleBits::from(b.mantissa); + // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1 + // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX + let q = ((aa - U256::ONE) / bb).wrap_u128() + 1; + q as FP + } else if exp <= FP::MAX_EXP { + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + // e > 0 + // The result is ceil((aa << e) / b). + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q < Bits::MAX` as in the previous case. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + // 0 <= r < b + let (q, r) = aa.div_rem(bb); + let q: Bits = q.wrap_u128(); + let r: Bits = r.wrap_u128(); + + // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are three cases: + // 1. ceil((r << e) / bb) = 0 + // 2. 0 < ceil((r << e) / bb) < 2^e + // 3. ceil((r << e) / bb) = 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / bb > (1 << e) - 1 + // (r << e) > (bb << e) - b + // ((bb - r) << e) <= bb - 1 + // bb - r <= (bb - 1) >> e + // r >= bb - ((bb - 1) >> e) + let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e); + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + } else { + // Case 3. + // No overflow because `q < Bits::MAX`. + q + 1 + }; + q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} diff --git a/library/std/src/f128/soft/tests.rs b/library/std/src/f128/soft/tests.rs new file mode 100644 index 0000000000000..2156e3fd8448a --- /dev/null +++ b/library/std/src/f128/soft/tests.rs @@ -0,0 +1,49 @@ +use crate::f128::soft::{Positive, PositiveFinite, Representation, Sign, pow2}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f128), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -112, mantissa: 3 << 111 }), + }); + + assert_eq!(Representation::new(f128::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -16494, mantissa: 1 << 112 }), + }); + + assert_eq!(Representation::new(f128::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -16495, mantissa: 1 << 112 }), + }); + + assert_eq!(Representation::new(f128::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 16271, mantissa: (1 << 113) - 1 }), + }); + + assert_eq!(Representation::new(-0.0f128), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f128::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f128::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} + +#[test] +fn test_pow2() { + assert_eq!(pow2(2), 4.0); + assert_eq!(pow2(-1), 0.5); + assert_eq!(pow2(-16495), 0.0); + assert_eq!(pow2(-16382), f128::MIN_POSITIVE); + assert_eq!(pow2(-16383), f128::MIN_POSITIVE / 2.0); + assert_eq!(pow2(16384), f128::INFINITY); +} diff --git a/library/std/src/f128/tests.rs b/library/std/src/f128/tests.rs index cbcf9f96239bb..667e4dc68bc30 100644 --- a/library/std/src/f128/tests.rs +++ b/library/std/src/f128/tests.rs @@ -460,7 +460,130 @@ fn test_mul_add() { } #[test] -#[cfg(reliable_f16_math)] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f128::NAN; + let inf = f128::INFINITY; + let largest_subnormal = f128::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f128.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f128.div_euclid(0.0), inf); + assert_eq!((-5.0f128).div_euclid(0.0), -inf); + + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f128.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f128.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f128).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f128.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f128.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f128).div_euclid(-inf), 1.0); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f128.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f128).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f128.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f128).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f128.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f128).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f128.div_euclid(10.0), 2.0); + assert_eq!((-20.0f128).div_euclid(10.0), -2.0); + assert_eq!(20.0f128.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f128).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f128.div_euclid(10.0), 2.0); + assert_eq!((-23.0f128).div_euclid(10.0), -3.0); + assert_eq!(23.0f128.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f128).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Medium results. + assert_eq!(1e9f128.div_euclid(10.0), 1e8); + assert_eq!((-1e9f128).div_euclid(10.0), -1e8); + assert_eq!(1e9f128.div_euclid(-10.0), -1e8); + assert_eq!((-1e9f128).div_euclid(-10.0), 1e8); + + assert_eq!(1_000_000_003.0f128.div_euclid(10.0), 100_000_000.0); + assert_eq!((-1_000_000_003.0f128).div_euclid(10.0), -100_000_001.0); + assert_eq!(1_000_000_003.0f128.div_euclid(-10.0), -100_000_000.0); + assert_eq!((-1_000_000_003.0f128).div_euclid(-10.0), 100_000_001.0); + + // Infinite results. + assert_eq!(1e4000f128.div_euclid(1e-4000), inf); + assert_eq!((-1e4000f128).div_euclid(1e-4000), -inf); + assert_eq!(1e4000f128.div_euclid(-1e-4000), -inf); + assert_eq!((-1e4000f128).div_euclid(-1e-4000), inf); + + // Large results, exactly divisible. + assert_eq!(1e4000f128.div_euclid(2.0), 0.5e4000); + assert_eq!((-1e4000f128).div_euclid(2.0), -0.5e4000); + assert_eq!(1e4000f128.div_euclid(-2.0), -0.5e4000); + assert_eq!((-1e4000f128).div_euclid(-2.0), 0.5e4000); + + // Large results, not divisible. + let a = 4281743078117879643174857908348485409148239872f128; // 3 * 2^150 + let b = 7f128; + // a / b = 0x1b6db6db6db6db6db6db6db6db6db6db6db6db.6d... + // = 611677582588268520453551129764069344164034267.42... + // floor(a / b) = 0x1b6db6db6db6db6db6db6db6db6db6db6db6db + // ceil(a / b) = 0x1b6db6db6db6db6db6db6db6db6db6db6db6dc + // Both round to 0x1b6db6db6db6db6db6db6db6db6db000000000 + // = 611677582588268520453551129764069314712829952 + assert_eq!(a.div_euclid(b), 611677582588268520453551129764069314712829952f128); + assert_eq!((-a).div_euclid(-b), 611677582588268520453551129764069314712829952f128); + + // Large results, test precise rounding edge cases. + + // a = 0x3abab02be95086e38544e68b3c78a0000000000000000000000000000000000 + // b = 0x10d3b8a2cc1076df4f04ec48a0a4e + let a = 1660249666442741415340463590408472308581124489457651197969883177321645473792f128; + let b = 5460685408006286130247220436011598f128; + + // a / b = 0x37d7bea0c08a6e5c00f664cda8aa1000000.be... + // = 304036863945418882481381401039785785556992.74... + // floor(a / b) = 0x37d7bea0c08a6e5c00f664cda8aa1000000 + // Rounds down to 0x37d7bea0c08a6e5c00f664cda8aa0000000 + // = 304036863945418882481381401039785785556992 + assert_eq!(a.div_euclid(b), 304036863945418882481381401039785785556992f128); + + // ceil(a / b) = 0x37d7bea0c08a6e5c00f664cda8aa1000001 + // Rounds up to 0x37d7bea0c08a6e5c00f664cda8aa2000000 + // = 304036863945418882481381401039785802334208 + assert_eq!((-a).div_euclid(-b), 304036863945418882481381401039785802334208f128); + + // a = 0x36625c03bcfd7d822e780b258e6ec0000000000000000000000000000000000 + // b = 0x166d7391fbd4d59a891d674eb11f3 + let a = 1537417493580712628196032571713889096389680551356671222166298628797042262016f128; + let b = 7278154372064079194208203952558579f128; + + // a / b = 0x26cc5433ff626fe2bbc14362e154affffff.5f... + // = 211237274587334998320332278178386764890111.37... + // floor(a / b) = 0x26cc5433ff626fe2bbc14362e154affffff + // Rounds down to 0x26cc5433ff626fe2bbc14362e154a000000 + // = 211237274587334998320332278178386748112896 + assert_eq!(a.div_euclid(b), 211237274587334998320332278178386748112896f128); + + // ceil(a / b) = 0x26cc5433ff626fe2bbc14362e154b000000 + // Rounds up to 0x26cc5433ff626fe2bbc14362e154c000000 + // = 211237274587334998320332278178386781667328 + assert_eq!((-a).div_euclid(-b), 211237274587334998320332278178386781667328f128); +} + +#[test] +#[cfg(reliable_f128_math)] fn test_recip() { let nan: f128 = f128::NAN; let inf: f128 = f128::INFINITY; diff --git a/library/std/src/f16.rs b/library/std/src/f16.rs index 5b7fcaa28e064..00095b740ed80 100644 --- a/library/std/src/f16.rs +++ b/library/std/src/f16.rs @@ -4,6 +4,8 @@ //! //! Mathematically significant numbers are provided in the `consts` sub-module. +mod soft; + #[cfg(test)] mod tests; @@ -235,10 +237,14 @@ impl f16 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -264,29 +270,23 @@ impl f16 { #[unstable(feature = "f16", issue = "116909")] #[must_use = "method returns a new number and does not mutate the original value"] pub fn div_euclid(self, rhs: f16) -> f16 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f16/soft.rs b/library/std/src/f16/soft.rs new file mode 100644 index 0000000000000..2ebd1bed1ef9b --- /dev/null +++ b/library/std/src/f16/soft.rs @@ -0,0 +1,213 @@ +//! Software floating point functions for `f16`. + +#[cfg(test)] +mod tests; + +/// The floating point type. +type FP = f16; + +/// Type of the exponent. +type Exponent = i16; + +/// Type of `FP::to_bits`. +type Bits = u16; + +/// Half as many bits as `Bits`. +type HalfBits = u8; + +/// Twice as many bits as `Bits`. +type DoubleBits = u32; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP as Exponent; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + // We have to use `as` because `From for f16` is not yet implemented. + q as f16 + } else if exp <= FP::MAX_EXP as Exponent { + // Verify that `aa` fits in `DoubleBits`. + const _: () = assert!(FP::MAX_EXP as u32 <= Bits::BITS); + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + let q = aa / bb; + q as FP + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + // We have to use `as` because `From for f16` is not yet implemented. + q as f16 + } else if exp <= FP::MAX_EXP as Exponent { + // Verify that `aa` fits in `DoubleBits`. + const _: () = assert!(FP::MAX_EXP as u32 <= Bits::BITS); + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + let q = (aa - 1) / bb + 1; + q as FP + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} diff --git a/library/std/src/f16/soft/tests.rs b/library/std/src/f16/soft/tests.rs new file mode 100644 index 0000000000000..ce277a20771b5 --- /dev/null +++ b/library/std/src/f16/soft/tests.rs @@ -0,0 +1,39 @@ +use crate::f16::soft::{Positive, PositiveFinite, Representation, Sign}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f16), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -10, mantissa: 0x600 }), + }); + + assert_eq!(Representation::new(f16::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -24, mantissa: 0x400 }), + }); + + assert_eq!(Representation::new(f16::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -25, mantissa: 0x400 }), + }); + + assert_eq!(Representation::new(f16::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 5, mantissa: 0x7ff }), + }); + + assert_eq!(Representation::new(-0.0), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f16::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f16::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} diff --git a/library/std/src/f16/tests.rs b/library/std/src/f16/tests.rs index 684ee3f3855b8..141f07e54a2cf 100644 --- a/library/std/src/f16/tests.rs +++ b/library/std/src/f16/tests.rs @@ -442,6 +442,76 @@ fn test_mul_add() { assert_eq!((-3.2f16).mul_add(2.4, neg_inf), neg_inf); } +#[test] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f16::NAN; + let inf = f16::INFINITY; + let largest_subnormal = f16::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f16.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f16.div_euclid(0.0), inf); + assert_eq!((-5.0f16).div_euclid(0.0), -inf); + + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f16.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f16.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f16).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f16.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f16.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f16).div_euclid(-inf), 1.0); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f16.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f16).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f16.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f16).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f16.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f16).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f16.div_euclid(10.0), 2.0); + assert_eq!((-20.0f16).div_euclid(10.0), -2.0); + assert_eq!(20.0f16.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f16).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f16.div_euclid(10.0), 2.0); + assert_eq!((-23.0f16).div_euclid(10.0), -3.0); + assert_eq!(23.0f16.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f16).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Large results, exactly divisible. + assert_eq!(1e4f16.div_euclid(2.0), 0.5e4); + assert_eq!((-1e4f16).div_euclid(2.0), -0.5e4); + assert_eq!(1e4f16.div_euclid(-2.0), -0.5e4); + assert_eq!((-1e4f16).div_euclid(-2.0), 0.5e4); + + // Large results, not divisible. + assert_eq!(1003.0f16.div_euclid(10.0), 100.0); + assert_eq!((-1003.0f16).div_euclid(10.0), -101.0); + assert_eq!(1003.0f16.div_euclid(-10.0), -100.0); + assert_eq!((-1003.0f16).div_euclid(-10.0), 101.0); + + // Infinite results. + assert_eq!(1000f16.div_euclid(0.001f16), inf); + assert_eq!((-1000f16).div_euclid(0.001f16), -inf); + assert_eq!(1000f16.div_euclid(-0.001f16), -inf); + assert_eq!((-1000f16).div_euclid(-0.001f16), inf); +} + #[test] #[cfg(reliable_f16_math)] fn test_recip() { diff --git a/library/std/src/f32.rs b/library/std/src/f32.rs index 7cb285bbff5f7..6207177768f90 100644 --- a/library/std/src/f32.rs +++ b/library/std/src/f32.rs @@ -12,6 +12,8 @@ #![stable(feature = "rust1", since = "1.0.0")] #![allow(missing_docs)] +mod soft; + #[cfg(test)] mod tests; @@ -219,10 +221,14 @@ impl f32 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -244,29 +250,23 @@ impl f32 { #[inline] #[stable(feature = "euclidean_division", since = "1.38.0")] pub fn div_euclid(self, rhs: f32) -> f32 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f32/soft.rs b/library/std/src/f32/soft.rs new file mode 100644 index 0000000000000..f5af4561881ad --- /dev/null +++ b/library/std/src/f32/soft.rs @@ -0,0 +1,352 @@ +//! Software floating point functions for `f32`. + +#[cfg(test)] +mod tests; + +/// The floating point type. +type FP = f32; + +/// Type of the exponent. +type Exponent = i32; + +/// Type of `FP::to_bits`. +type Bits = u32; + +/// Half as many bits as `Bits`. +type HalfBits = u16; + +/// Twice as many bits as `Bits`. +type DoubleBits = u64; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Shift right that works even if `shift >= Bits::BITS`. +/// +/// Requires `shift >= 0`. +fn safe_shr(a: Bits, shift: Exponent) -> Bits { + if shift < Bits::BITS as Exponent { a >> shift } else { 0 } +} + +/// Returns `2^exp`. +fn pow2(exp: Exponent) -> FP { + let biased_exponent = exp + EXPONENT_BIAS; + if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) { + // Round to 0. + 0.0 + } else if biased_exponent <= 0 { + // Subnormal. + FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1)) + } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 { + // Normal. + FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS) + } else { + // Round to infinity. + FP::INFINITY + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + let q = (aa / bb) as Bits; + q as FP + } else if exp <= FP::MAX_EXP { + // exp >= Bits::BITS + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + + // e > 0 + // The result is `floor((aa << e) / bb)` + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are two cases: + // 1. floor((r << e) / bb) = 0 + // 2. 0 < floor((r << e) / bb) < 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // Case 1 happens when: + // (r << e) / bb < 1 + // (r << e) <= bb - 1 + // r <= ((bb - 1) >> e) + let case_1_bound = safe_shr(b.mantissa - 1, e); + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + }; + q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1 + // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX + let q = ((aa - 1) / bb) as Bits + 1; + q as FP + } else if exp <= FP::MAX_EXP { + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + // e > 0 + // The result is ceil((aa << e) / b). + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q < Bits::MAX` as in the previous case. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are three cases: + // 1. ceil((r << e) / bb) = 0 + // 2. 0 < ceil((r << e) / bb) < 2^e + // 3. ceil((r << e) / bb) = 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / bb > (1 << e) - 1 + // (r << e) > (bb << e) - b + // ((bb - r) << e) <= bb - 1 + // bb - r <= (bb - 1) >> e + // r >= bb - ((bb - 1) >> e) + let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e); + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + } else { + // Case 3. + // No overflow because `q < Bits::MAX`. + q + 1 + }; + q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} diff --git a/library/std/src/f32/soft/tests.rs b/library/std/src/f32/soft/tests.rs new file mode 100644 index 0000000000000..e7b7affab9ab5 --- /dev/null +++ b/library/std/src/f32/soft/tests.rs @@ -0,0 +1,49 @@ +use crate::f32::soft::{Positive, PositiveFinite, Representation, Sign, pow2}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f32), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -23, mantissa: 0xc00000 }), + }); + + assert_eq!(Representation::new(f32::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -149, mantissa: 0x800000 }), + }); + + assert_eq!(Representation::new(f32::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -150, mantissa: 0x800000 }), + }); + + assert_eq!(Representation::new(f32::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 104, mantissa: 0xffffff }), + }); + + assert_eq!(Representation::new(-0.0), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f32::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f32::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} + +#[test] +fn test_pow2() { + assert_eq!(pow2(2), 4.0); + assert_eq!(pow2(-1), 0.5); + assert_eq!(pow2(-150), 0.0); + assert_eq!(pow2(-126), f32::MIN_POSITIVE); + assert_eq!(pow2(-127), f32::MIN_POSITIVE / 2.0); + assert_eq!(pow2(128), f32::INFINITY); +} diff --git a/library/std/src/f32/tests.rs b/library/std/src/f32/tests.rs index 99cfcfb231dad..1bdc80a4f37b8 100644 --- a/library/std/src/f32/tests.rs +++ b/library/std/src/f32/tests.rs @@ -423,6 +423,122 @@ fn test_mul_add() { assert_eq!((-3.2f32).mul_add(2.4, neg_inf), neg_inf); } +#[test] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f32::NAN; + let inf = f32::INFINITY; + let largest_subnormal = f32::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f32.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f32.div_euclid(0.0), inf); + assert_eq!((-5.0f32).div_euclid(0.0), -inf); + + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f32.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f32.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f32).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f32.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f32.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f32).div_euclid(-inf), 1.0); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f32.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f32).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f32.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f32).div_euclid(10.0), -1.0); + println!("{}", 1.0f32.div_euclid(-10.0)); + assert_eq!(Ordering::Equal, 1.0f32.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f32).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f32.div_euclid(10.0), 2.0); + assert_eq!((-20.0f32).div_euclid(10.0), -2.0); + assert_eq!(20.0f32.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f32).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f32.div_euclid(10.0), 2.0); + assert_eq!((-23.0f32).div_euclid(10.0), -3.0); + assert_eq!(23.0f32.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f32).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Medium results. + assert_eq!(1000000.0f32.div_euclid(10.0), 100000.0); + assert_eq!((-1000000.0f32).div_euclid(10.0), -100000.0); + assert_eq!(1000000.0f32.div_euclid(-10.0), -100000.0); + assert_eq!((-1000000.0f32).div_euclid(-10.0), 100000.0); + + assert_eq!(1000003.0f32.div_euclid(10.0), 100000.0); + assert_eq!((-1000003.0f32).div_euclid(10.0), -100001.0); + assert_eq!(1000003.0f32.div_euclid(-10.0), -100000.0); + assert_eq!((-1000003.0f32).div_euclid(-10.0), 100001.0); + + // Infinite results. + assert_eq!(1e30f32.div_euclid(1e-30), inf); + assert_eq!((-1e30f32).div_euclid(1e-30), -inf); + assert_eq!(1e30f32.div_euclid(-1e-30), -inf); + assert_eq!((-1e30f32).div_euclid(-1e-30), inf); + + // Large results, exactly divisible. + assert_eq!(1e30f32.div_euclid(2.0), 0.5e30); + assert_eq!((-1e30f32).div_euclid(2.0), -0.5e30); + assert_eq!(1e30f32.div_euclid(-2.0), -0.5e30); + assert_eq!((-1e30f32).div_euclid(-2.0), 0.5e30); + + // Large results, not divisible. + let a = 3377699720527872f32; // 3 * 2^50 + let b = 7f32; + // a / b = 0x1b6db6db6db6d.b6... = 482528531503981.71... + // floor(a / b) = 0x1b6db6db6db6d + // ceil(a / b) = 0x1b6db6db6db6e + // Both round to 0x1b6db6e000000 = 482528536297472 + assert_eq!(a.div_euclid(b), 482528536297472f32); + assert_eq!((-a).div_euclid(-b), 482528536297472f32); + + // Large results, test precise rounding edge cases. + + // a = 0x7dccd9000000000 = 566553671500824576 + // b = 0xed5c1c = 5555612 + let a = 566553671500824576f32; + let b = 15555612f32; + + // a / b = 0x87adf0800.c9... = 36421175296.78... + // floor(a / b) = 0x87adf0800 + // Rounds down to 0x87adf0000 = 36421173248 + assert_eq!(a.div_euclid(b), 36421173248f32); + + // ceil(a / b) = 0x87adf0801 + // Rounds up to 0x87adf1000 = 36421177344 + assert_eq!((-a).div_euclid(-b), 36421177344f32); + + // a = 0x7a83d5800000000 = 551758402519302144 + // b = 0x868b0f = 8817423 + let a = 551758402519302144f32; + let b = 8817423f32; + + // a / b = 0xe91d0d7ff.03... = 62575925247.01... + // floor(a / b) = 0xe91d0d7ff + // Rounds down to 0xe91d0d000 = 62575923200 + assert_eq!(a.div_euclid(b), 62575923200f32); + + // ceil(a / b) = 0xe91d0d800 + // Rounds up to 0xe91d0e000 = 62575927296 + assert_eq!((-a).div_euclid(-b), 62575927296f32); +} + #[test] fn test_recip() { let nan: f32 = f32::NAN; diff --git a/library/std/src/f64.rs b/library/std/src/f64.rs index 47163c272de32..751c77faff1a5 100644 --- a/library/std/src/f64.rs +++ b/library/std/src/f64.rs @@ -12,6 +12,8 @@ #![stable(feature = "rust1", since = "1.0.0")] #![allow(missing_docs)] +mod soft; + #[cfg(test)] mod tests; @@ -219,10 +221,14 @@ impl f64 { /// Calculates Euclidean division, the matching method for `rem_euclid`. /// - /// This computes the integer `n` such that + /// In infinite precision this computes the integer `n` such that /// `self = n * rhs + self.rem_euclid(rhs)`. - /// In other words, the result is `self / rhs` rounded to the integer `n` - /// such that `self >= n * rhs`. + /// In other words, the result is `self / rhs` rounded to the + /// integer `n` such that `self >= n * rhs`. + /// + /// However, due to a floating point round-off error the result can be rounded if + /// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented + /// exactly. /// /// # Precision /// @@ -244,29 +250,23 @@ impl f64 { #[inline] #[stable(feature = "euclidean_division", since = "1.38.0")] pub fn div_euclid(self, rhs: f64) -> f64 { - let q = (self / rhs).trunc(); - if self % rhs < 0.0 { - return if rhs > 0.0 { q - 1.0 } else { q + 1.0 }; - } - q + soft::div_euclid(self, rhs) } /// Calculates the least nonnegative remainder of `self (mod rhs)`. /// - /// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in - /// most cases. However, due to a floating point round-off error it can - /// result in `r == rhs.abs()`, violating the mathematical definition, if - /// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. - /// This result is not an element of the function's codomain, but it is the - /// closest floating point number in the real numbers and thus fulfills the - /// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)` - /// approximately. + /// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`. + /// + /// However, due to a floating point round-off error the result can round to + /// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`. /// /// # Precision /// /// The result of this operation is guaranteed to be the rounded /// infinite-precision result. /// + /// The result is precise when `self >= 0.0`. + /// /// # Examples /// /// ``` diff --git a/library/std/src/f64/soft.rs b/library/std/src/f64/soft.rs new file mode 100644 index 0000000000000..2106a25aaa2c7 --- /dev/null +++ b/library/std/src/f64/soft.rs @@ -0,0 +1,352 @@ +//! Software floating point functions for `f64`. + +#[cfg(test)] +mod tests; + +/// The floating point type. +type FP = f64; + +/// Type of the exponent. +type Exponent = i32; + +/// Type of `FP::to_bits`. +type Bits = u64; + +/// Half as many bits as `Bits`. +type HalfBits = u32; + +/// Twice as many bits as `Bits`. +type DoubleBits = u128; + +/// Number of bits in the significand. +const SIGNIFICAND_BITS: u32 = FP::MANTISSA_DIGITS - 1; + +/// Number of bits in the exponent. +const EXPONENT_BITS: u32 = Bits::BITS - 1 - SIGNIFICAND_BITS; + +/// Encoded exponent = EXPONENT_BIAS + actual exponent. +const EXPONENT_BIAS: Exponent = 2 - FP::MIN_EXP; + +/// Represents an `FP` number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct Representation { + /// Sign. + sign: Sign, + /// Absolute value. + abs: Positive, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Sign { + Positive, + Negative, +} + +/// Represents a positive number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum Positive { + Zero, + Finite(PositiveFinite), + Infinity, + NaN, +} + +/// Represents a non-zero, positive finite number. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct PositiveFinite { + /// `number = 2^exp * mantissa`. + exp: Exponent, + /// `2^SIGNIFICAND_BITS <= mantissa < 2^(SIGNIFICAND_BITS + 1)` + mantissa: Bits, +} + +impl Representation { + fn new(x: FP) -> Self { + let bits = x.to_bits(); + + let sign = if (bits >> (SIGNIFICAND_BITS + EXPONENT_BITS)) & 1 == 0 { + Sign::Positive + } else { + Sign::Negative + }; + + const SIGNIFICAND_MASK: Bits = (1 << SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Bits = (1 << EXPONENT_BITS) - 1; + + let significand = bits & SIGNIFICAND_MASK; + let biased_exponent = bits >> SIGNIFICAND_BITS & EXPONENT_MASK; + + let abs = match biased_exponent { + 0 if significand == 0 => Positive::Zero, + 0 => { + // Subnormal number. + // Normalize it by shifting left, so that it has exactly + // SIGNIFICAND_BITS + 1 bits. + let shift = SIGNIFICAND_BITS + 1 - (Bits::BITS - significand.leading_zeros()); + + Positive::Finite(PositiveFinite { + exp: 1 - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent - shift as Exponent, + mantissa: significand << shift, + }) + } + EXPONENT_MASK if significand == 0 => Positive::Infinity, + EXPONENT_MASK => Positive::NaN, + _ => Positive::Finite(PositiveFinite { + exp: biased_exponent as Exponent - EXPONENT_BIAS - SIGNIFICAND_BITS as Exponent, + mantissa: 1 << SIGNIFICAND_BITS | significand, + }), + }; + Representation { sign, abs } + } +} + +/// Shift right that works even if `shift >= Bits::BITS`. +/// +/// Requires `shift >= 0`. +fn safe_shr(a: Bits, shift: Exponent) -> Bits { + if shift < Bits::BITS as Exponent { a >> shift } else { 0 } +} + +/// Returns `2^exp`. +fn pow2(exp: Exponent) -> FP { + let biased_exponent = exp + EXPONENT_BIAS; + if biased_exponent <= -(SIGNIFICAND_BITS as Exponent) { + // Round to 0. + 0.0 + } else if biased_exponent <= 0 { + // Subnormal. + FP::from_bits(1 << (biased_exponent + (SIGNIFICAND_BITS as Exponent) - 1)) + } else if biased_exponent <= (1 << EXPONENT_BITS) - 1 { + // Normal. + FP::from_bits((biased_exponent as Bits) << SIGNIFICAND_BITS) + } else { + // Round to infinity. + FP::INFINITY + } +} + +/// Euclidean division. +#[allow(dead_code)] +pub(crate) fn div_euclid(a: FP, b: FP) -> FP { + let a = Representation::new(a); + let b = Representation::new(b); + let res = match a.sign { + Sign::Positive => div_floor_pos(a.abs, b.abs), + Sign::Negative => -div_ceil_pos(a.abs, b.abs), + }; + match b.sign { + Sign::Positive => res, + Sign::Negative => -res, + } +} + +/// Division rounded down for positive numbers. +fn div_floor_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_floor_pos_finite(a, b), + (Positive::Finite(_), Positive::Infinity) => 0.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded up for positive numbers. +fn div_ceil_pos(a: Positive, b: Positive) -> FP { + match (a, b) { + (Positive::NaN, _) => FP::NAN, + (_, Positive::NaN) => FP::NAN, + (Positive::Zero, Positive::Zero) => FP::NAN, + (Positive::Zero, Positive::Finite(_)) => 0.0, + (Positive::Zero, Positive::Infinity) => 0.0, + (Positive::Finite(_), Positive::Zero) => FP::INFINITY, + (Positive::Finite(a), Positive::Finite(b)) => div_ceil_pos_finite(a, b), + // Tricky case. It's 1.0 rather than 0.0. This way + // `div_euclid(-finite, inf) = -1.0` is consistent with + // `rem_euclid(-finite, inf) = inf`. + (Positive::Finite(_), Positive::Infinity) => 1.0, + (Positive::Infinity, Positive::Zero) => FP::INFINITY, + (Positive::Infinity, Positive::Finite(_)) => FP::INFINITY, + (Positive::Infinity, Positive::Infinity) => FP::NAN, + } +} + +/// Division rounded down for positive finite numbers. +fn div_floor_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 0.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 <= HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS <= HalfBits::BITS + // SIGNIFICAND_BITS >= HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS >= HalfBits::BITS); + let q = (aa / b.mantissa) as HalfBits; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + let q = (aa / bb) as Bits; + q as FP + } else if exp <= FP::MAX_EXP { + // exp >= Bits::BITS + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + + // e > 0 + // The result is `floor((aa << e) / bb)` + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q` fits in `Bits` because `a.mantissa < 2 * b.mantissa`. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = floor((aa << e) / bb) = (q << e) + floor((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are two cases: + // 1. floor((r << e) / bb) = 0 + // 2. 0 < floor((r << e) / bb) < 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // Case 1 happens when: + // (r << e) / bb < 1 + // (r << e) <= bb - 1 + // r <= ((bb - 1) >> e) + let case_1_bound = safe_shr(b.mantissa - 1, e); + let q_adj = if r <= case_1_bound { + // Case 1. + q + } else { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + }; + q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} + +/// Division rounded up for positive finite numbers. +fn div_ceil_pos_finite(a: PositiveFinite, b: PositiveFinite) -> FP { + let exp = a.exp - b.exp; + if exp < 0 { + 1.0 + } else if exp <= (Bits::BITS - SIGNIFICAND_BITS - 1) as Exponent { + // `aa` fits in `Bits` + let aa = a.mantissa << exp; + // a.mantissa / b.mantissa < 2, hence `q` fits in `HalfBits` as long as: + // exp + 1 < HalfBits::BITS + // Bits::BITS - SIGNIFICAND_BITS < HalfBits::BITS + // SIGNIFICAND_BITS > HalfBits::BITS + const _: () = assert!(SIGNIFICAND_BITS > HalfBits::BITS); + let q = ((aa - 1) / b.mantissa) as HalfBits + 1; + q.into() + } else if exp <= (Bits::BITS - 1) as Exponent { + let aa = DoubleBits::from(a.mantissa) << exp; + let bb = DoubleBits::from(b.mantissa); + // q <= aa / bb + 1 <= (2 - 2^-SIGNIFICAND_BITS) * 2^(Bits::BITS-1) + 1 + // <= 2^Bits::BITS - 2^(Bits::BITS - SIGNIFICAND_BITS) + 1 <= Bits::MAX + let q = ((aa - 1) / bb) as Bits + 1; + q as FP + } else if exp <= FP::MAX_EXP { + let aa = DoubleBits::from(a.mantissa) << (Bits::BITS - 1); + let bb = DoubleBits::from(b.mantissa); + // e > 0 + // The result is ceil((aa << e) / b). + let e = exp - (Bits::BITS - 1) as Exponent; + + // aa = q * bb + r + // `q < Bits::MAX` as in the previous case. + // `q > Bits::MAX / 4` because `b.mantissa < 2 * a.mantissa`. + let q = (aa / bb) as Bits; + // 0 <= r < b + let r = (aa % bb) as Bits; + + // result = ceil((aa << e) / bb) = (q << e) + ceil((r << e) / bb) + // 0 <= (r << e) / bb < 2^e + // + // There are three cases: + // 1. ceil((r << e) / bb) = 0 + // 2. 0 < ceil((r << e) / bb) < 2^e + // 3. ceil((r << e) / bb) = 2^e + // + // Case 1: + // The result is q << e. + // + // Case 2: + // The result is (q << e) + non-zero low e bits. + // + // Rounding beyond the SIGNIFICAND_BITS + 2 most significant bits of q depends + // only on whether the low-order bits are non-zero. Since q > Bits::MAX / 4, + // q.leading_zeros() <= 1. Therefore, beyond the top SIGNIFICAND_BITS + 3 bits + // of q, it doesn't matter *which* bits are non-zero. As long as: + // + // SIGNIFICAND_BITS <= Bits::BITS - 4 + // + // we can just set bit 0 of q to 1 instead of using extra low-order bits. + // + // Therefore the result rounds the same way as (q | 1) << e. + // + // In case 3: + // The result is (q + 1) << e. + // + // Case 1 happens when r = 0. + // Case 3 happens when: + // (r << e) / bb > (1 << e) - 1 + // (r << e) > (bb << e) - b + // ((bb - r) << e) <= bb - 1 + // bb - r <= (bb - 1) >> e + // r >= bb - ((bb - 1) >> e) + let case_3_bound = b.mantissa - safe_shr(b.mantissa - 1, e); + let q_adj = if r == 0 { + // Case 1. + q + } else if r < case_3_bound { + // Case 2. + const _: () = assert!(SIGNIFICAND_BITS + 4 <= Bits::BITS); + q | 1 + } else { + // Case 3. + // No overflow because `q < Bits::MAX`. + q + 1 + }; + q_adj as FP * pow2(e) + } else { + // a.mantissa / b.mantissa >= 1/2, hence + // a / b >= 1/2 * 2^(MAX_EXP+1) = 2^MAX_EXP + FP::INFINITY + } +} diff --git a/library/std/src/f64/soft/tests.rs b/library/std/src/f64/soft/tests.rs new file mode 100644 index 0000000000000..c74b2bbebaadb --- /dev/null +++ b/library/std/src/f64/soft/tests.rs @@ -0,0 +1,49 @@ +use crate::f64::soft::{Positive, PositiveFinite, Representation, Sign, pow2}; + +#[test] +fn test_representation() { + assert_eq!(Representation::new(-1.5f64), Representation { + sign: Sign::Negative, + abs: Positive::Finite(PositiveFinite { exp: -52, mantissa: 3 << 51 }), + }); + + assert_eq!(Representation::new(f64::MIN_POSITIVE), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -1074, mantissa: 1 << 52 }), + }); + + assert_eq!(Representation::new(f64::MIN_POSITIVE / 2.0), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: -1075, mantissa: 1 << 52 }), + }); + + assert_eq!(Representation::new(f64::MAX), Representation { + sign: Sign::Positive, + abs: Positive::Finite(PositiveFinite { exp: 971, mantissa: (1 << 53) - 1 }), + }); + + assert_eq!(Representation::new(-0.0f64), Representation { + sign: Sign::Negative, + abs: Positive::Zero, + }); + + assert_eq!(Representation::new(f64::INFINITY), Representation { + sign: Sign::Positive, + abs: Positive::Infinity, + }); + + assert_eq!(Representation::new(f64::NAN), Representation { + sign: Sign::Positive, + abs: Positive::NaN, + }); +} + +#[test] +fn test_pow2() { + assert_eq!(pow2(2), 4.0); + assert_eq!(pow2(-1), 0.5); + assert_eq!(pow2(-1075), 0.0); + assert_eq!(pow2(-1022), f64::MIN_POSITIVE); + assert_eq!(pow2(-1023), f64::MIN_POSITIVE / 2.0); + assert_eq!(pow2(1024), f64::INFINITY); +} diff --git a/library/std/src/f64/tests.rs b/library/std/src/f64/tests.rs index 3fac2efe0d76c..b6147326818ea 100644 --- a/library/std/src/f64/tests.rs +++ b/library/std/src/f64/tests.rs @@ -411,6 +411,132 @@ fn test_mul_add() { assert_eq!((-3.2f64).mul_add(2.4, neg_inf), neg_inf); } +#[test] +fn test_div_euclid() { + use core::cmp::Ordering; + + let nan = f64::NAN; + let inf = f64::INFINITY; + let largest_subnormal = f64::from_bits(LARGEST_SUBNORMAL_BITS); + + // Infinity and NaN. + assert!(nan.div_euclid(5.0).is_nan()); + assert!(inf.div_euclid(inf).is_nan()); + assert!(0.0f64.div_euclid(0.0).is_nan()); + + assert_eq!(inf.div_euclid(3.0), inf); + assert_eq!(inf.div_euclid(0.0), inf); + assert_eq!(5.0f64.div_euclid(0.0), inf); + assert_eq!((-5.0f64).div_euclid(0.0), -inf); + + // Small / infinity. + assert_eq!(Ordering::Equal, 5.0f64.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, 0.0f64.div_euclid(inf).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(inf).total_cmp(&-0.0)); + assert_eq!((-5.0f64).div_euclid(inf), -1.0); + assert_eq!(Ordering::Equal, 5.0f64.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, 0.0f64.div_euclid(-inf).total_cmp(&-0.0)); + assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(-inf).total_cmp(&0.0)); + assert_eq!((-5.0f64).div_euclid(-inf), 1.0); + + // 0 / x + assert_eq!(Ordering::Equal, 0.0f64.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!(Ordering::Equal, (-0.0f64).div_euclid(10.0).total_cmp(&-0.0)); + + // small / large + assert_eq!(Ordering::Equal, 1.0f64.div_euclid(10.0).total_cmp(&0.0)); + assert_eq!((-1.0f64).div_euclid(10.0), -1.0); + assert_eq!(Ordering::Equal, 1.0f64.div_euclid(-10.0).total_cmp(&-0.0)); + assert_eq!((-1.0f64).div_euclid(-10.0), 1.0); + + // Small results. + assert_eq!(20.0f64.div_euclid(10.0), 2.0); + assert_eq!((-20.0f64).div_euclid(10.0), -2.0); + assert_eq!(20.0f64.div_euclid(-10.0), -2.0); + assert_eq!((-20.0f64).div_euclid(-10.0), 2.0); + + assert_eq!(23.0f64.div_euclid(10.0), 2.0); + assert_eq!((-23.0f64).div_euclid(10.0), -3.0); + assert_eq!(23.0f64.div_euclid(-10.0), -2.0); + assert_eq!((-23.0f64).div_euclid(-10.0), 3.0); + + assert_eq!((2.0 * largest_subnormal).div_euclid(largest_subnormal), 2.0); + + // Issue #107904 + let a = -0.7500000000000006f64; + let b = 0.0833333333333334f64; + assert_eq!(a.div_euclid(b), -9.0); + + let a = 46.11764705882354f64; + let b = 1.6470588235294124f64; + assert_eq!(a.div_euclid(b), 27.0); + + // Medium results. + assert_eq!(1000000.0f64.div_euclid(10.0), 100000.0); + assert_eq!((-1000000.0f64).div_euclid(10.0), -100000.0); + assert_eq!(1000000.0f64.div_euclid(-10.0), -100000.0); + assert_eq!((-1000000.0f64).div_euclid(-10.0), 100000.0); + + assert_eq!(1000003.0f64.div_euclid(10.0), 100000.0); + assert_eq!((-1000003.0f64).div_euclid(10.0), -100001.0); + assert_eq!(1000003.0f64.div_euclid(-10.0), -100000.0); + assert_eq!((-1000003.0f64).div_euclid(-10.0), 100001.0); + + // Large results, exactly divisible. + assert_eq!(1e300f64.div_euclid(2.0), 0.5e300); + assert_eq!((-1e300f64).div_euclid(2.0), -0.5e300); + assert_eq!(1e300f64.div_euclid(-2.0), -0.5e300); + assert_eq!((-1e300f64).div_euclid(-2.0), 0.5e300); + + // Infinite results. + assert_eq!(1e300f64.div_euclid(1e-300), inf); + assert_eq!((-1e300f64).div_euclid(1e-300), -inf); + assert_eq!(1e300f64.div_euclid(-1e-300), -inf); + assert_eq!((-1e300f64).div_euclid(-1e-300), inf); + + // Large results, not divisible. + let a = 3802951800684688204490109616128f64; // 3 * 2^100 + let b = 7f64; + // a / b = 0x6db6db6db6db6db6db6db6db6.db... + // = 543278828669241172070015659446.85... + // floor(a / b) = 0x6db6db6db6db6db6db6db6db6 + // ceil(a / b) = 0x6db6db6db6db6db6db6db6db7 + // Both round to 0x6db6db6db6db6c00000000000 + // = 543278828669241141911982440448 + assert_eq!(a.div_euclid(b), 543278828669241141911982440448f64); + assert_eq!((-a).div_euclid(-b), 543278828669241141911982440448f64); + + // Large results, test precise rounding edge cases. + + // a = 0xc3b75b9c77bfa80000000000000000 = 1016216826558977217020954221979107328 + // b = 0x130594ed7aaacb = 5354161755040459 + let a = 1016216826558977217020954221979107328f64; + let b = 5354161755040459f64; + + // a / b = 0xa49ff0421f8f94000.0a... = 189799425764882989056.03... + // floor(a / b) = 0xa49ff0421f8f94000 + // Rounds down to 0xa49ff0421f8f90000 = 189799425764882972672 + assert_eq!(a.div_euclid(b), 189799425764882972672f64); + + // ceil(a / b) = 0xa49ff0421f8f94001 + // Rounds up to 0xa49ff0421f8f98000 = 189799425764883005440 + assert_eq!((-a).div_euclid(-b), 189799425764883005440f64); + + // a = 0xee67640bd3b6880000000000000000 = 1237863666996996959508242859395383296 + // b = 0x15409d10e91300 = 5982017848677120 + // a / b = 0xb37bdd747bac8bfff.5b... = 206930787956565786623.35... + let a = 1237863666996996959508242859395383296f64; + let b = 5982017848677120f64; + + // floor(a / b) = 0xb37bdd747bac8bfff + // Rounds down to 0xb37bdd747bac88000 = 206930787956565770240 + assert_eq!(a.div_euclid(b), 206930787956565770240f64); + + // ceil(a / b) = 0xb37bdd747bac8c000 + // Rounds up to 0xb37bdd747bac90000 = + assert_eq!((-a).div_euclid(-b), 206930787956565803008f64); +} + #[test] fn test_recip() { let nan: f64 = f64::NAN; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 49a0322003905..df5d6f6cd3930 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -321,6 +321,7 @@ // Library features (core): // tidy-alphabetical-start #![feature(array_chunks)] +#![feature(bigint_helper_methods)] #![feature(c_str_module)] #![feature(char_internals)] #![feature(clone_to_uninit)] @@ -676,6 +677,7 @@ pub mod alloc; // Private support modules mod panicking; +mod u256; #[path = "../../backtrace/src/lib.rs"] #[allow(dead_code, unused_attributes, fuzzy_provenance_casts, unsafe_op_in_unsafe_fn)] diff --git a/library/std/src/u256.rs b/library/std/src/u256.rs new file mode 100644 index 0000000000000..7094753257dc6 --- /dev/null +++ b/library/std/src/u256.rs @@ -0,0 +1,117 @@ +//! Limited unsigned 256-bit arithmetic for internal use. + +#[cfg(test)] +mod tests; + +use core::ops::{BitOr, BitOrAssign, Div, DivAssign, Shl, ShlAssign, Sub, SubAssign}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub(crate) struct U256 { + high: u128, + low: u128, +} + +impl U256 { + pub(crate) const ZERO: Self = Self { high: 0, low: 0 }; + + pub(crate) const ONE: Self = Self { high: 0, low: 1 }; + + /// The number of leading 0 bits. + pub(crate) fn leading_zeros(self) -> u32 { + if self.high != 0 { self.high.leading_zeros() } else { 128 + self.low.leading_zeros() } + } + + /// Returns (quotient, remainder). + pub(crate) fn div_rem(mut self, rhs: Self) -> (Self, Self) { + assert!(rhs != Self::ZERO); + let shift_limit = (rhs.leading_zeros() + 1).saturating_sub(self.leading_zeros()); + let mut q = Self::ZERO; + for shift in (0..shift_limit).rev() { + let rhs_shifted = rhs << shift; + if self >= rhs_shifted { + q |= Self::ONE << shift; + self -= rhs_shifted; + } + } + (q, self) + } + + /// Wrap to `u128`. + pub(crate) fn wrap_u128(self) -> u128 { + self.low + } +} + +impl From for U256 { + fn from(value: u128) -> Self { + Self { high: 0, low: value } + } +} + +impl Shl for U256 { + type Output = Self; + + fn shl(self, rhs: u32) -> Self { + if rhs == 0 { + self + } else if rhs < 128 { + Self { high: self.high << rhs | self.low >> (128 - rhs), low: self.low << rhs } + } else if rhs == 128 { + Self { high: self.low, low: 0 } + } else if rhs < 256 { + Self { high: self.low << (rhs - 128), low: 0 } + } else { + Self::ZERO + } + } +} + +impl ShlAssign for U256 { + fn shl_assign(&mut self, rhs: u32) { + *self = *self << rhs; + } +} + +impl Sub for U256 { + type Output = Self; + + fn sub(self, rhs: U256) -> Self { + let (low, borrow) = self.low.overflowing_sub(rhs.low); + let (high, _) = self.high.borrowing_sub(rhs.high, borrow); + Self { low, high } + } +} + +impl SubAssign for U256 { + fn sub_assign(&mut self, rhs: U256) { + *self = *self - rhs; + } +} + +impl Div for U256 { + type Output = Self; + + fn div(self, rhs: U256) -> Self { + self.div_rem(rhs).0 + } +} + +impl DivAssign for U256 { + fn div_assign(&mut self, rhs: U256) { + *self = *self / rhs; + } +} + +impl BitOr for U256 { + type Output = Self; + + fn bitor(self, rhs: U256) -> Self { + Self { high: self.high | rhs.high, low: self.low | rhs.low } + } +} + +impl BitOrAssign for U256 { + fn bitor_assign(&mut self, rhs: U256) { + *self = *self | rhs; + } +} diff --git a/library/std/src/u256/tests.rs b/library/std/src/u256/tests.rs new file mode 100644 index 0000000000000..0466b519d6f5d --- /dev/null +++ b/library/std/src/u256/tests.rs @@ -0,0 +1,66 @@ +use crate::u256::U256; + +#[test] +fn test_leading_zeros() { + assert_eq!(U256 { high: 0, low: 0 }.leading_zeros(), 256); + assert_eq!(U256 { high: 0xfff, low: 0xf }.leading_zeros(), 116); + assert_eq!(U256 { high: 0, low: 0xf }.leading_zeros(), 252); +} + +#[test] +fn test_wrap_u128() { + assert_eq!(U256 { high: 3, low: 4 }.wrap_u128(), 4); +} + +#[test] +fn test_from_u128() { + assert_eq!(U256::from(7u128), U256 { high: 0, low: 7 }); +} + +#[test] +fn test_shl() { + let a = U256 { high: 0x1234, low: 0x0123456789abcdef0123456789abcdef }; + + assert_eq!(a << 0, a); + assert_eq!(a << 16, U256 { high: 0x12340123, low: 0x456789abcdef0123456789abcdef0000 }); + assert_eq!(a << 128, U256 { high: 0x0123456789abcdef0123456789abcdef, low: 0 }); + assert_eq!(a << 140, U256 { high: 0x3456789abcdef0123456789abcdef000, low: 0 }); + assert_eq!(a << 256, U256::ZERO); + assert_eq!(a << 1000, U256::ZERO); +} + +#[test] +fn test_sub() { + let a = U256 { high: 7, low: 10 }; + let b = U256 { high: 3, low: 2 }; + let c = U256 { high: 3, low: 15 }; + assert_eq!(a - b, U256 { high: 4, low: 8 }); + assert_eq!(a - c, U256 { high: 3, low: u128::MAX - 4 }); +} + +#[test] +fn test_div_rem() { + let a = U256 { high: 0xabcdef, low: 0x0123456789abcdef0123456789abcdef }; + let b = U256 { high: 0, low: 0x987 }; + let c = U256 { high: 0x13, low: 0x9876543210abc }; + assert_eq!( + a.div_rem(b), + (U256 { high: 0x1208, low: 0x63d18447855b62f52e9a6983a9e22813 }, U256 { + high: 0, + low: 0xea + }) + ); + assert_eq!( + a.div_rem(c), + (U256 { high: 0, low: 0x90ad6 }, U256 { + high: 0xd, + low: 0x0123456789abcd98d752c5f8c1057cc7 + },) + ); + assert_eq!(c.div_rem(a), (U256::ZERO, c)); +} + +#[test] +fn test_div() { + assert_eq!(U256::from(47u128) / U256::from(10u128), U256::from(4u128)); +}