Skip to content

Commit ef74e50

Browse files
Neutron3529Neutron3529
authored and
Neutron3529
committed
Rearrange the pipeline of pow to gain efficiency
The check of the `exp` parameter seems useless if we execute the while-loop more than once. The original implementation of `pow` function using one more comparison if the `exp==0` and may break the pipeline of the cpu, which may generate a slower code. The performance gap between the old and the new implementation may be small, but IMO, at least the newer one looks more beautiful. --- bench prog: ``` extern crate test; ($a:expr)=>{let time=std::time::Instant::now();{$a;}print!("{:?} ",time.elapsed())}; ($a:expr,$b:literal)=>{let time=std::time::Instant::now();let mut a=0;for _ in 0..$b{a^=$a;}print!("{:?} {} ",time.elapsed(),a)} } pub fn pow_rust(x:i64, mut exp: u32) -> i64 { let mut base = x; let mut acc = 1; while exp > 1 { if (exp & 1) == 1 { acc = acc * base; } exp /= 2; base = base * base; } if exp == 1 { acc = acc * base; } acc } pub fn pow_new(x:i64, mut exp: u32) -> i64 { if exp==0{ 1 }else{ let mut base = x; let mut acc = 1; while exp > 1 { if (exp & 1) == 1 { acc = acc * base; } exp >>= 1; base = base * base; } acc * base } } fn main(){ let a=2i64; let b=1_u32; println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); timing!(test::black_box(a).pow(test::black_box(b)),100000000); timing!(pow_new(test::black_box(a),test::black_box(b)),100000000); timing!(pow_rust(test::black_box(a),test::black_box(b)),100000000); println!(); } ``` bench in my laptop: ``` neutron@Neutron:/me/rust$ rc commit.rs rustc commit.rs && ./commit 3.978419716s 0 4.079765171s 0 3.964630622s 0 3.997127013s 0 4.260304804s 0 3.997638211s 0 3.963195544s 0 4.11657718s 0 4.176054164s 0 3.830128579s 0 3.980396122s 0 3.937258567s 0 3.986055948s 0 4.127804162s 0 4.018943411s 0 4.185568857s 0 4.217512517s 0 3.98313603s 0 3.863018225s 0 4.030447988s 0 3.694878237s 0 4.206987927s 0 4.137608047s 0 4.115564664s 0 neutron@Neutron:/me/rust$ rc commit.rs -O rustc commit.rs -O && ./commit 162.111993ms 0 165.107125ms 0 166.26924ms 0 175.20479ms 0 205.062565ms 0 176.278791ms 0 174.408975ms 0 166.526899ms 0 201.857604ms 0 146.190062ms 0 168.592821ms 0 154.61411ms 0 199.678912ms 0 168.411598ms 0 162.129996ms 0 147.420765ms 0 209.759326ms 0 154.807907ms 0 165.507134ms 0 188.476239ms 0 157.351524ms 0 121.320123ms 0 126.401229ms 0 114.86428ms 0 ``` delete an unnecessary semicolon... Sorry for the typo. delete trailing whitespace Sorry, too.. Sorry for the missing... I checked all the implementations, and finally found that there is one function that does not check whether `exp == 0` add extra tests add extra tests. finished adding the extra tests to prevent further typo add pow(2) to negative exp add whitespace. add whitespace add whitespace delete extra line
1 parent c714eae commit ef74e50

File tree

3 files changed

+101
-44
lines changed

3 files changed

+101
-44
lines changed

src/libcore/num/mod.rs

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,9 @@ $EndFeature, "
11031103
without modifying the original"]
11041104
#[inline]
11051105
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
1106+
if exp == 0 {
1107+
return Some(1);
1108+
}
11061109
let mut base = self;
11071110
let mut acc: Self = 1;
11081111

@@ -1113,15 +1116,11 @@ $EndFeature, "
11131116
exp /= 2;
11141117
base = try_opt!(base.checked_mul(base));
11151118
}
1116-
1119+
// since exp!=0, finally the exp must be 1.
11171120
// Deal with the final bit of the exponent separately, since
11181121
// squaring the base afterwards is not necessary and may cause a
11191122
// needless overflow.
1120-
if exp == 1 {
1121-
acc = try_opt!(acc.checked_mul(base));
1122-
}
1123-
1124-
Some(acc)
1123+
Some(try_opt!(acc.checked_mul(base)))
11251124
}
11261125
}
11271126

@@ -1631,6 +1630,9 @@ $EndFeature, "
16311630
without modifying the original"]
16321631
#[inline]
16331632
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
1633+
if exp == 0 {
1634+
return 1;
1635+
}
16341636
let mut base = self;
16351637
let mut acc: Self = 1;
16361638

@@ -1642,14 +1644,11 @@ $EndFeature, "
16421644
base = base.wrapping_mul(base);
16431645
}
16441646

1647+
// since exp!=0, finally the exp must be 1.
16451648
// Deal with the final bit of the exponent separately, since
16461649
// squaring the base afterwards is not necessary and may cause a
16471650
// needless overflow.
1648-
if exp == 1 {
1649-
acc = acc.wrapping_mul(base);
1650-
}
1651-
1652-
acc
1651+
acc.wrapping_mul(base)
16531652
}
16541653
}
16551654

@@ -1999,6 +1998,9 @@ $EndFeature, "
19991998
without modifying the original"]
20001999
#[inline]
20012000
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
2001+
if exp == 0 {
2002+
return (1,false);
2003+
}
20022004
let mut base = self;
20032005
let mut acc: Self = 1;
20042006
let mut overflown = false;
@@ -2017,16 +2019,13 @@ $EndFeature, "
20172019
overflown |= r.1;
20182020
}
20192021

2022+
// since exp!=0, finally the exp must be 1.
20202023
// Deal with the final bit of the exponent separately, since
20212024
// squaring the base afterwards is not necessary and may cause a
20222025
// needless overflow.
2023-
if exp == 1 {
2024-
r = acc.overflowing_mul(base);
2025-
acc = r.0;
2026-
overflown |= r.1;
2027-
}
2028-
2029-
(acc, overflown)
2026+
r = acc.overflowing_mul(base);
2027+
r.1 |= overflown;
2028+
r
20302029
}
20312030
}
20322031

@@ -2050,6 +2049,9 @@ $EndFeature, "
20502049
#[inline]
20512050
#[rustc_inherit_overflow_checks]
20522051
pub const fn pow(self, mut exp: u32) -> Self {
2052+
if exp == 0 {
2053+
return 1;
2054+
}
20532055
let mut base = self;
20542056
let mut acc = 1;
20552057

@@ -2061,14 +2063,11 @@ $EndFeature, "
20612063
base = base * base;
20622064
}
20632065

2066+
// since exp!=0, finally the exp must be 1.
20642067
// Deal with the final bit of the exponent separately, since
20652068
// squaring the base afterwards is not necessary and may cause a
20662069
// needless overflow.
2067-
if exp == 1 {
2068-
acc = acc * base;
2069-
}
2070-
2071-
acc
2070+
acc * base
20722071
}
20732072
}
20742073

@@ -3306,6 +3305,9 @@ assert_eq!(", stringify!($SelfT), "::MAX.checked_pow(2), None);", $EndFeature, "
33063305
without modifying the original"]
33073306
#[inline]
33083307
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
3308+
if exp == 0 {
3309+
return Some(1);
3310+
}
33093311
let mut base = self;
33103312
let mut acc: Self = 1;
33113313

@@ -3317,14 +3319,12 @@ assert_eq!(", stringify!($SelfT), "::MAX.checked_pow(2), None);", $EndFeature, "
33173319
base = try_opt!(base.checked_mul(base));
33183320
}
33193321

3322+
// since exp!=0, finally the exp must be 1.
33203323
// Deal with the final bit of the exponent separately, since
33213324
// squaring the base afterwards is not necessary and may cause a
33223325
// needless overflow.
3323-
if exp == 1 {
3324-
acc = try_opt!(acc.checked_mul(base));
3325-
}
33263326

3327-
Some(acc)
3327+
Some(try_opt!(acc.checked_mul(base)))
33283328
}
33293329
}
33303330

@@ -3715,6 +3715,9 @@ assert_eq!(3u8.wrapping_pow(6), 217);", $EndFeature, "
37153715
without modifying the original"]
37163716
#[inline]
37173717
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
3718+
if exp == 0 {
3719+
return 1;
3720+
}
37183721
let mut base = self;
37193722
let mut acc: Self = 1;
37203723

@@ -3726,14 +3729,11 @@ assert_eq!(3u8.wrapping_pow(6), 217);", $EndFeature, "
37263729
base = base.wrapping_mul(base);
37273730
}
37283731

3732+
// since exp!=0, finally the exp must be 1.
37293733
// Deal with the final bit of the exponent separately, since
37303734
// squaring the base afterwards is not necessary and may cause a
37313735
// needless overflow.
3732-
if exp == 1 {
3733-
acc = acc.wrapping_mul(base);
3734-
}
3735-
3736-
acc
3736+
acc.wrapping_mul(base)
37373737
}
37383738
}
37393739

@@ -4040,6 +4040,9 @@ assert_eq!(3u8.overflowing_pow(6), (217, true));", $EndFeature, "
40404040
without modifying the original"]
40414041
#[inline]
40424042
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
4043+
if exp == 0{
4044+
return (1,false);
4045+
}
40434046
let mut base = self;
40444047
let mut acc: Self = 1;
40454048
let mut overflown = false;
@@ -4058,16 +4061,14 @@ assert_eq!(3u8.overflowing_pow(6), (217, true));", $EndFeature, "
40584061
overflown |= r.1;
40594062
}
40604063

4064+
// since exp!=0, finally the exp must be 1.
40614065
// Deal with the final bit of the exponent separately, since
40624066
// squaring the base afterwards is not necessary and may cause a
40634067
// needless overflow.
4064-
if exp == 1 {
4065-
r = acc.overflowing_mul(base);
4066-
acc = r.0;
4067-
overflown |= r.1;
4068-
}
4068+
r = acc.overflowing_mul(base);
4069+
r.1 |= overflown;
40694070

4070-
(acc, overflown)
4071+
r
40714072
}
40724073
}
40734074

@@ -4088,6 +4089,9 @@ Basic usage:
40884089
#[inline]
40894090
#[rustc_inherit_overflow_checks]
40904091
pub const fn pow(self, mut exp: u32) -> Self {
4092+
if exp == 0 {
4093+
return 1;
4094+
}
40914095
let mut base = self;
40924096
let mut acc = 1;
40934097

@@ -4099,14 +4103,11 @@ Basic usage:
40994103
base = base * base;
41004104
}
41014105

4106+
// since exp!=0, finally the exp must be 1.
41024107
// Deal with the final bit of the exponent separately, since
41034108
// squaring the base afterwards is not necessary and may cause a
41044109
// needless overflow.
4105-
if exp == 1 {
4106-
acc = acc * base;
4107-
}
4108-
4109-
acc
4110+
acc * base
41104111
}
41114112
}
41124113

src/libcore/tests/num/int_macros.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,12 +255,43 @@ macro_rules! int_module {
255255
#[test]
256256
fn test_pow() {
257257
let mut r = 2 as $T;
258-
259258
assert_eq!(r.pow(2), 4 as $T);
260259
assert_eq!(r.pow(0), 1 as $T);
260+
assert_eq!(r.wrapping_pow(2), 4 as $T);
261+
assert_eq!(r.wrapping_pow(0), 1 as $T);
262+
assert_eq!(r.checked_pow(2), Some(4 as $T));
263+
assert_eq!(r.checked_pow(0), Some(1 as $T));
264+
assert_eq!(r.overflowing_pow(2), (4 as $T, false));
265+
assert_eq!(r.overflowing_pow(0), (1 as $T, false));
266+
assert_eq!(r.saturating_pow(2), 4 as $T);
267+
assert_eq!(r.saturating_pow(0), 1 as $T);
268+
269+
r = MAX;
270+
// use `^` to represent .pow() with no overflow.
271+
// if itest::MAX == 2^j-1, then itest is a `j` bit int,
272+
// so that `itest::MAX*itest::MAX == 2^(2*j)-2^(j+1)+1`,
273+
// thussaturating_pow the overflowing result is exactly 1.
274+
assert_eq!(r.wrapping_pow(2), 1 as $T);
275+
assert_eq!(r.checked_pow(2), None);
276+
assert_eq!(r.overflowing_pow(2), (1 as $T, true));
277+
assert_eq!(r.saturating_pow(2), MAX);
278+
//test for negative exponent.
261279
r = -2 as $T;
262280
assert_eq!(r.pow(2), 4 as $T);
263281
assert_eq!(r.pow(3), -8 as $T);
282+
assert_eq!(r.pow(0), 1 as $T);
283+
assert_eq!(r.wrapping_pow(2), 4 as $T);
284+
assert_eq!(r.wrapping_pow(3), -8 as $T);
285+
assert_eq!(r.wrapping_pow(0), 1 as $T);
286+
assert_eq!(r.checked_pow(2), Some(4 as $T));
287+
assert_eq!(r.checked_pow(3), Some(-8 as $T));
288+
assert_eq!(r.checked_pow(0), Some(1 as $T));
289+
assert_eq!(r.overflowing_pow(2), (4 as $T, false));
290+
assert_eq!(r.overflowing_pow(3), (-8 as $T, false));
291+
assert_eq!(r.overflowing_pow(0), (1 as $T, false));
292+
assert_eq!(r.saturating_pow(2), 4 as $T);
293+
assert_eq!(r.saturating_pow(3), -8 as $T);
294+
assert_eq!(r.saturating_pow(0), 1 as $T);
264295
}
265296
}
266297
};

src/libcore/tests/num/uint_macros.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,31 @@ macro_rules! uint_module {
184184
assert_eq!($T::from_str_radix("Z", 10).ok(), None::<$T>);
185185
assert_eq!($T::from_str_radix("_", 2).ok(), None::<$T>);
186186
}
187+
188+
#[test]
189+
fn test_pow() {
190+
let mut r = 2 as $T;
191+
assert_eq!(r.pow(2), 4 as $T);
192+
assert_eq!(r.pow(0), 1 as $T);
193+
assert_eq!(r.wrapping_pow(2), 4 as $T);
194+
assert_eq!(r.wrapping_pow(0), 1 as $T);
195+
assert_eq!(r.checked_pow(2), Some(4 as $T));
196+
assert_eq!(r.checked_pow(0), Some(1 as $T));
197+
assert_eq!(r.overflowing_pow(2), (4 as $T, false));
198+
assert_eq!(r.overflowing_pow(0), (1 as $T, false));
199+
assert_eq!(r.saturating_pow(2), 4 as $T);
200+
assert_eq!(r.saturating_pow(0), 1 as $T);
201+
202+
r = MAX;
203+
// use `^` to represent .pow() with no overflow.
204+
// if itest::MAX == 2^j-1, then itest is a `j` bit int,
205+
// so that `itest::MAX*itest::MAX == 2^(2*j)-2^(j+1)+1`,
206+
// thussaturating_pow the overflowing result is exactly 1.
207+
assert_eq!(r.wrapping_pow(2), 1 as $T);
208+
assert_eq!(r.checked_pow(2), None);
209+
assert_eq!(r.overflowing_pow(2), (1 as $T, true));
210+
assert_eq!(r.saturating_pow(2), MAX);
211+
}
187212
}
188213
};
189214
}

0 commit comments

Comments
 (0)