Skip to content

rewrite of shootout-fasta.rs #10933

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 80 additions & 112 deletions src/test/bench/shootout-fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,148 +8,116 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[feature(managed_boxes)];

/* -*- mode: rust; indent-tabs-mode: nil -*-
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay!

* Implementation of 'fasta' benchmark from
* Computer Language Benchmarks Game
* http://shootout.alioth.debian.org/
*/
extern mod extra;

use std::int;
use std::io;
use std::io::buffered::BufferedWriter;
use std::io::File;
use std::num::min;
use std::os;
use std::rand::Rng;
use std::rand;
use std::str;

static LINE_LENGTH: uint = 60u;
static LINE_LENGTH: uint = 60;
static IM: u32 = 139968;

struct MyRandom {
last: u32
}

fn myrandom_next(r: @mut MyRandom, mx: u32) -> u32 {
r.last = (r.last * 3877u32 + 29573u32) % 139968u32;
mx * r.last / 139968u32
}

#[deriving(Clone)]
struct AminoAcids {
ch: char,
prob: u32
}

fn make_cumulative(aa: ~[AminoAcids]) -> ~[AminoAcids] {
let mut cp: u32 = 0u32;
let mut ans: ~[AminoAcids] = ~[];
for a in aa.iter() {
cp += a.prob;
ans.push(AminoAcids {ch: a.ch, prob: cp});
impl MyRandom {
fn new() -> MyRandom { MyRandom { last: 42 } }
fn normalize(p: f32) -> u32 {(p * IM as f32).floor() as u32}
fn gen(&mut self) -> u32 {
self.last = (self.last * 3877 + 29573) % IM;
self.last
}
ans
}

fn select_random(r: u32, genelist: ~[AminoAcids]) -> char {
if r < genelist[0].prob { return genelist[0].ch; }
fn bisect(v: ~[AminoAcids], lo: uint, hi: uint, target: u32) -> char {
if hi > lo + 1u {
let mid: uint = lo + (hi - lo) / 2u;
if target < v[mid].prob {
return bisect(v, lo, mid, target);
} else {
return bisect(v, mid, hi, target);
}
} else {
return v[hi].ch;
}
struct AAGen<'a> {
rng: &'a mut MyRandom,
data: ~[(u32, u8)]
}
impl<'a> AAGen<'a> {
fn new<'b>(rng: &'b mut MyRandom, aa: &[(char, f32)]) -> AAGen<'b> {
let mut cum = 0.;
let data = aa.iter()
.map(|&(ch, p)| { cum += p; (MyRandom::normalize(cum), ch as u8) })
.collect();
AAGen { rng: rng, data: data }
}
bisect(genelist.clone(), 0, genelist.len() - 1, r)
}

fn make_random_fasta(wr: @mut io::Writer,
id: ~str,
desc: ~str,
genelist: ~[AminoAcids],
n: int) {
writeln!(wr, ">{} {}", id, desc);
let mut rng = rand::rng();
let rng = @mut MyRandom {
last: rng.gen()
};
let mut op: ~str = ~"";
for _ in range(0u, n as uint) {
op.push_char(select_random(myrandom_next(rng, 100u32),
genelist.clone()));
if op.len() >= LINE_LENGTH {
writeln!(wr, "{}", op);
op = ~"";
}
impl<'a> Iterator<u8> for AAGen<'a> {
fn next(&mut self) -> Option<u8> {
let r = self.rng.gen();
self.data.iter()
.skip_while(|pc| pc.n0() < r)
.map(|&(_, c)| c)
.next()
}
if op.len() > 0u { writeln!(wr, "{}", op); }
}

fn make_repeat_fasta(wr: @mut io::Writer, id: ~str, desc: ~str, s: ~str, n: int) {
writeln!(wr, ">{} {}", id, desc);
let mut op = str::with_capacity( LINE_LENGTH );
let sl = s.len();
for i in range(0u, n as uint) {
if (op.len() >= LINE_LENGTH) {
writeln!(wr, "{}", op);
op = str::with_capacity( LINE_LENGTH );
fn make_fasta<W: Writer, I: Iterator<u8>>(
wr: &mut W, header: &str, mut it: I, mut n: uint)
{
wr.write(header.as_bytes());
let mut line = [0u8, .. LINE_LENGTH + 1];
while n > 0 {
let nb = min(LINE_LENGTH, n);
for i in range(0, nb) {
line[i] = it.next().unwrap();
}
op.push_char( s[i % sl] as char );
n -= nb;
line[nb] = '\n' as u8;
wr.write(line.slice_to(nb + 1));
}
if op.len() > 0 {
writeln!(wr, "{}", op);
}
}

fn acid(ch: char, prob: u32) -> AminoAcids {
AminoAcids {ch: ch, prob: prob}
}

fn main() {
fn run<W: Writer>(writer: &mut W) {
let args = os::args();
let args = if os::getenv("RUST_BENCH").is_some() {
// alioth tests k-nucleotide with this data at 25,000,000
~[~"", ~"5000000"]
let n = if os::getenv("RUST_BENCH").is_some() {
25000000
} else if args.len() <= 1u {
~[~"", ~"1000"]
1000
} else {
args
from_str(args[1]).unwrap()
};

let writer = if os::getenv("RUST_BENCH").is_some() {
let file = File::create(&Path::new("./shootout-fasta.data"));
@mut file as @mut io::Writer
} else {
@mut io::stdout() as @mut io::Writer
};

let n = from_str::<int>(args[1]).unwrap();
let rng = &mut MyRandom::new();
let alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
let iub = &[('a', 0.27), ('c', 0.12), ('g', 0.12),
('t', 0.27), ('B', 0.02), ('D', 0.02),
('H', 0.02), ('K', 0.02), ('M', 0.02),
('N', 0.02), ('R', 0.02), ('S', 0.02),
('V', 0.02), ('W', 0.02), ('Y', 0.02)];
let homosapiens = &[('a', 0.3029549426680),
('c', 0.1979883004921),
('g', 0.1975473066391),
('t', 0.3015094502008)];

make_fasta(writer, ">ONE Homo sapiens alu\n",
alu.as_bytes().iter().cycle().map(|c| *c), n * 2);
make_fasta(writer, ">TWO IUB ambiguity codes\n",
AAGen::new(rng, iub), n * 3);
make_fasta(writer, ">THREE Homo sapiens frequency\n",
AAGen::new(rng, homosapiens), n * 5);

writer.flush();
}

let iub: ~[AminoAcids] =
make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
let homosapiens: ~[AminoAcids] =
make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
acid('t', 30u32)]);
let alu: ~str =
~"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
make_repeat_fasta(writer, ~"ONE", ~"Homo sapiens alu", alu, n * 2);
make_random_fasta(writer, ~"TWO", ~"IUB ambiguity codes", iub, n * 3);
make_random_fasta(writer, ~"THREE",
~"Homo sapiens frequency", homosapiens, n * 5);
fn main() {
if os::getenv("RUST_BENCH").is_some() {
let mut file = BufferedWriter::new(File::create(&Path::new("./shootout-fasta.data")));
run(&mut file);
} else {
run(&mut BufferedWriter::new(io::stdout()));
}
}