|
8 | 8 | // option. This file may not be copied, modified, or distributed
|
9 | 9 | // except according to those terms.
|
10 | 10 |
|
11 |
| -#[feature(managed_boxes)]; |
12 |
| - |
13 | 11 | /* -*- mode: rust; indent-tabs-mode: nil -*-
|
14 | 12 | * Implementation of 'fasta' benchmark from
|
15 | 13 | * Computer Language Benchmarks Game
|
16 | 14 | * http://shootout.alioth.debian.org/
|
17 | 15 | */
|
18 |
| -extern mod extra; |
19 | 16 |
|
20 |
| -use std::int; |
21 | 17 | use std::io;
|
| 18 | +use std::io::buffered::BufferedWriter; |
22 | 19 | use std::io::File;
|
| 20 | +use std::num::min; |
23 | 21 | use std::os;
|
24 |
| -use std::rand::Rng; |
25 |
| -use std::rand; |
26 |
| -use std::str; |
27 | 22 |
|
28 |
| -static LINE_LENGTH: uint = 60u; |
| 23 | +static LINE_LENGTH: uint = 60; |
| 24 | +static IM: u32 = 139968; |
29 | 25 |
|
30 | 26 | struct MyRandom {
|
31 | 27 | last: u32
|
32 | 28 | }
|
33 |
| - |
34 |
| -fn myrandom_next(r: @mut MyRandom, mx: u32) -> u32 { |
35 |
| - r.last = (r.last * 3877u32 + 29573u32) % 139968u32; |
36 |
| - mx * r.last / 139968u32 |
37 |
| -} |
38 |
| - |
39 |
| -#[deriving(Clone)] |
40 |
| -struct AminoAcids { |
41 |
| - ch: char, |
42 |
| - prob: u32 |
43 |
| -} |
44 |
| - |
45 |
| -fn make_cumulative(aa: ~[AminoAcids]) -> ~[AminoAcids] { |
46 |
| - let mut cp: u32 = 0u32; |
47 |
| - let mut ans: ~[AminoAcids] = ~[]; |
48 |
| - for a in aa.iter() { |
49 |
| - cp += a.prob; |
50 |
| - ans.push(AminoAcids {ch: a.ch, prob: cp}); |
| 29 | +impl MyRandom { |
| 30 | + fn new() -> MyRandom { MyRandom { last: 42 } } |
| 31 | + fn normalize(p: f32) -> u32 {(p * IM as f32).floor() as u32} |
| 32 | + fn gen(&mut self) -> u32 { |
| 33 | + self.last = (self.last * 3877 + 29573) % IM; |
| 34 | + self.last |
51 | 35 | }
|
52 |
| - ans |
53 | 36 | }
|
54 | 37 |
|
55 |
| -fn select_random(r: u32, genelist: ~[AminoAcids]) -> char { |
56 |
| - if r < genelist[0].prob { return genelist[0].ch; } |
57 |
| - fn bisect(v: ~[AminoAcids], lo: uint, hi: uint, target: u32) -> char { |
58 |
| - if hi > lo + 1u { |
59 |
| - let mid: uint = lo + (hi - lo) / 2u; |
60 |
| - if target < v[mid].prob { |
61 |
| - return bisect(v, lo, mid, target); |
62 |
| - } else { |
63 |
| - return bisect(v, mid, hi, target); |
64 |
| - } |
65 |
| - } else { |
66 |
| - return v[hi].ch; |
67 |
| - } |
| 38 | +struct AAGen<'a> { |
| 39 | + rng: &'a mut MyRandom, |
| 40 | + data: ~[(u32, u8)] |
| 41 | +} |
| 42 | +impl<'a> AAGen<'a> { |
| 43 | + fn new<'b>(rng: &'b mut MyRandom, aa: &[(char, f32)]) -> AAGen<'b> { |
| 44 | + let mut cum = 0.; |
| 45 | + let data = aa.iter() |
| 46 | + .map(|&(ch, p)| { cum += p; (MyRandom::normalize(cum), ch as u8) }) |
| 47 | + .collect(); |
| 48 | + AAGen { rng: rng, data: data } |
68 | 49 | }
|
69 |
| - bisect(genelist.clone(), 0, genelist.len() - 1, r) |
70 | 50 | }
|
71 |
| - |
72 |
| -fn make_random_fasta(wr: @mut io::Writer, |
73 |
| - id: ~str, |
74 |
| - desc: ~str, |
75 |
| - genelist: ~[AminoAcids], |
76 |
| - n: int) { |
77 |
| - writeln!(wr, ">{} {}", id, desc); |
78 |
| - let mut rng = rand::rng(); |
79 |
| - let rng = @mut MyRandom { |
80 |
| - last: rng.gen() |
81 |
| - }; |
82 |
| - let mut op: ~str = ~""; |
83 |
| - for _ in range(0u, n as uint) { |
84 |
| - op.push_char(select_random(myrandom_next(rng, 100u32), |
85 |
| - genelist.clone())); |
86 |
| - if op.len() >= LINE_LENGTH { |
87 |
| - writeln!(wr, "{}", op); |
88 |
| - op = ~""; |
89 |
| - } |
| 51 | +impl<'a> Iterator<u8> for AAGen<'a> { |
| 52 | + fn next(&mut self) -> Option<u8> { |
| 53 | + let r = self.rng.gen(); |
| 54 | + self.data.iter() |
| 55 | + .skip_while(|pc| pc.n0() < r) |
| 56 | + .map(|&(_, c)| c) |
| 57 | + .next() |
90 | 58 | }
|
91 |
| - if op.len() > 0u { writeln!(wr, "{}", op); } |
92 | 59 | }
|
93 | 60 |
|
94 |
| -fn make_repeat_fasta(wr: @mut io::Writer, id: ~str, desc: ~str, s: ~str, n: int) { |
95 |
| - writeln!(wr, ">{} {}", id, desc); |
96 |
| - let mut op = str::with_capacity( LINE_LENGTH ); |
97 |
| - let sl = s.len(); |
98 |
| - for i in range(0u, n as uint) { |
99 |
| - if (op.len() >= LINE_LENGTH) { |
100 |
| - writeln!(wr, "{}", op); |
101 |
| - op = str::with_capacity( LINE_LENGTH ); |
| 61 | +fn make_fasta<W: Writer, I: Iterator<u8>>( |
| 62 | + wr: &mut W, header: &str, mut it: I, mut n: uint) |
| 63 | +{ |
| 64 | + wr.write(header.as_bytes()); |
| 65 | + let mut line = [0u8, .. LINE_LENGTH + 1]; |
| 66 | + while n > 0 { |
| 67 | + let nb = min(LINE_LENGTH, n); |
| 68 | + for i in range(0, nb) { |
| 69 | + line[i] = it.next().unwrap(); |
102 | 70 | }
|
103 |
| - op.push_char( s[i % sl] as char ); |
| 71 | + n -= nb; |
| 72 | + line[nb] = '\n' as u8; |
| 73 | + wr.write(line.slice_to(nb + 1)); |
104 | 74 | }
|
105 |
| - if op.len() > 0 { |
106 |
| - writeln!(wr, "{}", op); |
107 |
| - } |
108 |
| -} |
109 |
| - |
110 |
| -fn acid(ch: char, prob: u32) -> AminoAcids { |
111 |
| - AminoAcids {ch: ch, prob: prob} |
112 | 75 | }
|
113 | 76 |
|
114 |
| -fn main() { |
| 77 | +fn run<W: Writer>(writer: &mut W) { |
115 | 78 | let args = os::args();
|
116 |
| - let args = if os::getenv("RUST_BENCH").is_some() { |
117 |
| - // alioth tests k-nucleotide with this data at 25,000,000 |
118 |
| - ~[~"", ~"5000000"] |
| 79 | + let n = if os::getenv("RUST_BENCH").is_some() { |
| 80 | + 25000000 |
119 | 81 | } else if args.len() <= 1u {
|
120 |
| - ~[~"", ~"1000"] |
| 82 | + 1000 |
121 | 83 | } else {
|
122 |
| - args |
| 84 | + from_str(args[1]).unwrap() |
123 | 85 | };
|
124 | 86 |
|
125 |
| - let writer = if os::getenv("RUST_BENCH").is_some() { |
126 |
| - let file = File::create(&Path::new("./shootout-fasta.data")); |
127 |
| - @mut file as @mut io::Writer |
128 |
| - } else { |
129 |
| - @mut io::stdout() as @mut io::Writer |
130 |
| - }; |
131 |
| - |
132 |
| - let n = from_str::<int>(args[1]).unwrap(); |
| 87 | + let rng = &mut MyRandom::new(); |
| 88 | + let alu = |
| 89 | + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\ |
| 90 | + GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\ |
| 91 | + CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\ |
| 92 | + ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\ |
| 93 | + GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\ |
| 94 | + AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\ |
| 95 | + AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; |
| 96 | + let iub = &[('a', 0.27), ('c', 0.12), ('g', 0.12), |
| 97 | + ('t', 0.27), ('B', 0.02), ('D', 0.02), |
| 98 | + ('H', 0.02), ('K', 0.02), ('M', 0.02), |
| 99 | + ('N', 0.02), ('R', 0.02), ('S', 0.02), |
| 100 | + ('V', 0.02), ('W', 0.02), ('Y', 0.02)]; |
| 101 | + let homosapiens = &[('a', 0.3029549426680), |
| 102 | + ('c', 0.1979883004921), |
| 103 | + ('g', 0.1975473066391), |
| 104 | + ('t', 0.3015094502008)]; |
| 105 | + |
| 106 | + make_fasta(writer, ">ONE Homo sapiens alu\n", |
| 107 | + alu.as_bytes().iter().cycle().map(|c| *c), n * 2); |
| 108 | + make_fasta(writer, ">TWO IUB ambiguity codes\n", |
| 109 | + AAGen::new(rng, iub), n * 3); |
| 110 | + make_fasta(writer, ">THREE Homo sapiens frequency\n", |
| 111 | + AAGen::new(rng, homosapiens), n * 5); |
| 112 | + |
| 113 | + writer.flush(); |
| 114 | +} |
133 | 115 |
|
134 |
| - let iub: ~[AminoAcids] = |
135 |
| - make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32), |
136 |
| - acid('t', 27u32), acid('B', 2u32), acid('D', 2u32), |
137 |
| - acid('H', 2u32), acid('K', 2u32), acid('M', 2u32), |
138 |
| - acid('N', 2u32), acid('R', 2u32), acid('S', 2u32), |
139 |
| - acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]); |
140 |
| - let homosapiens: ~[AminoAcids] = |
141 |
| - make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32), |
142 |
| - acid('t', 30u32)]); |
143 |
| - let alu: ~str = |
144 |
| - ~"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\ |
145 |
| - GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\ |
146 |
| - CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\ |
147 |
| - ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\ |
148 |
| - GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\ |
149 |
| - AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\ |
150 |
| - AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; |
151 |
| - make_repeat_fasta(writer, ~"ONE", ~"Homo sapiens alu", alu, n * 2); |
152 |
| - make_random_fasta(writer, ~"TWO", ~"IUB ambiguity codes", iub, n * 3); |
153 |
| - make_random_fasta(writer, ~"THREE", |
154 |
| - ~"Homo sapiens frequency", homosapiens, n * 5); |
| 116 | +fn main() { |
| 117 | + if os::getenv("RUST_BENCH").is_some() { |
| 118 | + let mut file = BufferedWriter::new(File::create(&Path::new("./shootout-fasta.data"))); |
| 119 | + run(&mut file); |
| 120 | + } else { |
| 121 | + run(&mut BufferedWriter::new(io::stdout())); |
| 122 | + } |
155 | 123 | }
|
0 commit comments