Skip to content

Commit b2750bc

Browse files
committed
auto merge of #10933 : TeXitoi/rust/shootout-fasta-rewrite, r=alexcrichton
improvements: - no managed box - no virtual calls - no useless copy - optimizations (bisect is slower, limit tests, BufferedWriter...) - pass shootout test - 10 times faster
2 parents 00b1adf + 64ca0ba commit b2750bc

File tree

1 file changed

+80
-112
lines changed

1 file changed

+80
-112
lines changed

src/test/bench/shootout-fasta.rs

Lines changed: 80 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -8,148 +8,116 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
#[feature(managed_boxes)];
12-
1311
/* -*- mode: rust; indent-tabs-mode: nil -*-
1412
* Implementation of 'fasta' benchmark from
1513
* Computer Language Benchmarks Game
1614
* http://shootout.alioth.debian.org/
1715
*/
18-
extern mod extra;
1916

20-
use std::int;
2117
use std::io;
18+
use std::io::buffered::BufferedWriter;
2219
use std::io::File;
20+
use std::num::min;
2321
use std::os;
24-
use std::rand::Rng;
25-
use std::rand;
26-
use std::str;
2722

28-
static LINE_LENGTH: uint = 60u;
23+
static LINE_LENGTH: uint = 60;
24+
static IM: u32 = 139968;
2925

3026
struct MyRandom {
3127
last: u32
3228
}
33-
34-
fn myrandom_next(r: @mut MyRandom, mx: u32) -> u32 {
35-
r.last = (r.last * 3877u32 + 29573u32) % 139968u32;
36-
mx * r.last / 139968u32
37-
}
38-
39-
#[deriving(Clone)]
40-
struct AminoAcids {
41-
ch: char,
42-
prob: u32
43-
}
44-
45-
fn make_cumulative(aa: ~[AminoAcids]) -> ~[AminoAcids] {
46-
let mut cp: u32 = 0u32;
47-
let mut ans: ~[AminoAcids] = ~[];
48-
for a in aa.iter() {
49-
cp += a.prob;
50-
ans.push(AminoAcids {ch: a.ch, prob: cp});
29+
impl MyRandom {
30+
fn new() -> MyRandom { MyRandom { last: 42 } }
31+
fn normalize(p: f32) -> u32 {(p * IM as f32).floor() as u32}
32+
fn gen(&mut self) -> u32 {
33+
self.last = (self.last * 3877 + 29573) % IM;
34+
self.last
5135
}
52-
ans
5336
}
5437

55-
fn select_random(r: u32, genelist: ~[AminoAcids]) -> char {
56-
if r < genelist[0].prob { return genelist[0].ch; }
57-
fn bisect(v: ~[AminoAcids], lo: uint, hi: uint, target: u32) -> char {
58-
if hi > lo + 1u {
59-
let mid: uint = lo + (hi - lo) / 2u;
60-
if target < v[mid].prob {
61-
return bisect(v, lo, mid, target);
62-
} else {
63-
return bisect(v, mid, hi, target);
64-
}
65-
} else {
66-
return v[hi].ch;
67-
}
38+
struct AAGen<'a> {
39+
rng: &'a mut MyRandom,
40+
data: ~[(u32, u8)]
41+
}
42+
impl<'a> AAGen<'a> {
43+
fn new<'b>(rng: &'b mut MyRandom, aa: &[(char, f32)]) -> AAGen<'b> {
44+
let mut cum = 0.;
45+
let data = aa.iter()
46+
.map(|&(ch, p)| { cum += p; (MyRandom::normalize(cum), ch as u8) })
47+
.collect();
48+
AAGen { rng: rng, data: data }
6849
}
69-
bisect(genelist.clone(), 0, genelist.len() - 1, r)
7050
}
71-
72-
fn make_random_fasta(wr: @mut io::Writer,
73-
id: ~str,
74-
desc: ~str,
75-
genelist: ~[AminoAcids],
76-
n: int) {
77-
writeln!(wr, ">{} {}", id, desc);
78-
let mut rng = rand::rng();
79-
let rng = @mut MyRandom {
80-
last: rng.gen()
81-
};
82-
let mut op: ~str = ~"";
83-
for _ in range(0u, n as uint) {
84-
op.push_char(select_random(myrandom_next(rng, 100u32),
85-
genelist.clone()));
86-
if op.len() >= LINE_LENGTH {
87-
writeln!(wr, "{}", op);
88-
op = ~"";
89-
}
51+
impl<'a> Iterator<u8> for AAGen<'a> {
52+
fn next(&mut self) -> Option<u8> {
53+
let r = self.rng.gen();
54+
self.data.iter()
55+
.skip_while(|pc| pc.n0() < r)
56+
.map(|&(_, c)| c)
57+
.next()
9058
}
91-
if op.len() > 0u { writeln!(wr, "{}", op); }
9259
}
9360

94-
fn make_repeat_fasta(wr: @mut io::Writer, id: ~str, desc: ~str, s: ~str, n: int) {
95-
writeln!(wr, ">{} {}", id, desc);
96-
let mut op = str::with_capacity( LINE_LENGTH );
97-
let sl = s.len();
98-
for i in range(0u, n as uint) {
99-
if (op.len() >= LINE_LENGTH) {
100-
writeln!(wr, "{}", op);
101-
op = str::with_capacity( LINE_LENGTH );
61+
fn make_fasta<W: Writer, I: Iterator<u8>>(
62+
wr: &mut W, header: &str, mut it: I, mut n: uint)
63+
{
64+
wr.write(header.as_bytes());
65+
let mut line = [0u8, .. LINE_LENGTH + 1];
66+
while n > 0 {
67+
let nb = min(LINE_LENGTH, n);
68+
for i in range(0, nb) {
69+
line[i] = it.next().unwrap();
10270
}
103-
op.push_char( s[i % sl] as char );
71+
n -= nb;
72+
line[nb] = '\n' as u8;
73+
wr.write(line.slice_to(nb + 1));
10474
}
105-
if op.len() > 0 {
106-
writeln!(wr, "{}", op);
107-
}
108-
}
109-
110-
fn acid(ch: char, prob: u32) -> AminoAcids {
111-
AminoAcids {ch: ch, prob: prob}
11275
}
11376

114-
fn main() {
77+
fn run<W: Writer>(writer: &mut W) {
11578
let args = os::args();
116-
let args = if os::getenv("RUST_BENCH").is_some() {
117-
// alioth tests k-nucleotide with this data at 25,000,000
118-
~[~"", ~"5000000"]
79+
let n = if os::getenv("RUST_BENCH").is_some() {
80+
25000000
11981
} else if args.len() <= 1u {
120-
~[~"", ~"1000"]
82+
1000
12183
} else {
122-
args
84+
from_str(args[1]).unwrap()
12385
};
12486

125-
let writer = if os::getenv("RUST_BENCH").is_some() {
126-
let file = File::create(&Path::new("./shootout-fasta.data"));
127-
@mut file as @mut io::Writer
128-
} else {
129-
@mut io::stdout() as @mut io::Writer
130-
};
131-
132-
let n = from_str::<int>(args[1]).unwrap();
87+
let rng = &mut MyRandom::new();
88+
let alu =
89+
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
90+
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
91+
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
92+
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
93+
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
94+
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
95+
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
96+
let iub = &[('a', 0.27), ('c', 0.12), ('g', 0.12),
97+
('t', 0.27), ('B', 0.02), ('D', 0.02),
98+
('H', 0.02), ('K', 0.02), ('M', 0.02),
99+
('N', 0.02), ('R', 0.02), ('S', 0.02),
100+
('V', 0.02), ('W', 0.02), ('Y', 0.02)];
101+
let homosapiens = &[('a', 0.3029549426680),
102+
('c', 0.1979883004921),
103+
('g', 0.1975473066391),
104+
('t', 0.3015094502008)];
105+
106+
make_fasta(writer, ">ONE Homo sapiens alu\n",
107+
alu.as_bytes().iter().cycle().map(|c| *c), n * 2);
108+
make_fasta(writer, ">TWO IUB ambiguity codes\n",
109+
AAGen::new(rng, iub), n * 3);
110+
make_fasta(writer, ">THREE Homo sapiens frequency\n",
111+
AAGen::new(rng, homosapiens), n * 5);
112+
113+
writer.flush();
114+
}
133115

134-
let iub: ~[AminoAcids] =
135-
make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
136-
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
137-
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
138-
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
139-
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
140-
let homosapiens: ~[AminoAcids] =
141-
make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
142-
acid('t', 30u32)]);
143-
let alu: ~str =
144-
~"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
145-
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
146-
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
147-
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
148-
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
149-
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
150-
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
151-
make_repeat_fasta(writer, ~"ONE", ~"Homo sapiens alu", alu, n * 2);
152-
make_random_fasta(writer, ~"TWO", ~"IUB ambiguity codes", iub, n * 3);
153-
make_random_fasta(writer, ~"THREE",
154-
~"Homo sapiens frequency", homosapiens, n * 5);
116+
fn main() {
117+
if os::getenv("RUST_BENCH").is_some() {
118+
let mut file = BufferedWriter::new(File::create(&Path::new("./shootout-fasta.data")));
119+
run(&mut file);
120+
} else {
121+
run(&mut BufferedWriter::new(io::stdout()));
122+
}
155123
}

0 commit comments

Comments
 (0)