Skip to content

Remove the need for unwrap when using ProbeSeq #208

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ use test::{black_box, Bencher};

use hashbrown::hash_map::DefaultHashBuilder;
use hashbrown::HashMap;
use std::collections::hash_map::RandomState;
use std::{
collections::hash_map::RandomState,
sync::atomic::{self, AtomicUsize},
};

const SIZE: usize = 1000;

Expand Down Expand Up @@ -40,6 +43,20 @@ impl Iterator for RandomKeys {
}
}

// Just an arbitrary side effect to make the maps not shortcircuit to the non-dropping path
// when dropping maps/entries (most real world usages likely have drop in the key or value)
lazy_static::lazy_static! {
static ref SIDE_EFFECT: AtomicUsize = AtomicUsize::new(0);
}

#[derive(Clone)]
struct DropType(usize);
impl Drop for DropType {
fn drop(&mut self) {
SIDE_EFFECT.fetch_add(self.0, atomic::Ordering::SeqCst);
}
}

macro_rules! bench_suite {
($bench_macro:ident, $bench_ahash_serial:ident, $bench_std_serial:ident,
$bench_ahash_highbits:ident, $bench_std_highbits:ident,
Expand Down Expand Up @@ -69,10 +86,11 @@ macro_rules! bench_insert {
b.iter(|| {
m.clear();
for i in ($keydist).take(SIZE) {
m.insert(i, i);
m.insert(i, DropType(i));
}
black_box(&mut m);
})
});
eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst));
}
};
}
Expand All @@ -93,7 +111,7 @@ macro_rules! bench_insert_erase {
fn $name(b: &mut Bencher) {
let mut base = $maptype::default();
for i in ($keydist).take(SIZE) {
base.insert(i, i);
base.insert(i, DropType(i));
}
let skip = $keydist.skip(SIZE);
b.iter(|| {
Expand All @@ -103,11 +121,12 @@ macro_rules! bench_insert_erase {
// While keeping the size constant,
// replace the first keydist with the second.
for (add, remove) in (&mut add_iter).zip(&mut remove_iter).take(SIZE) {
m.insert(add, add);
m.insert(add, DropType(add));
black_box(m.remove(&remove));
}
black_box(m);
})
});
eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst));
}
};
}
Expand All @@ -128,14 +147,15 @@ macro_rules! bench_lookup {
fn $name(b: &mut Bencher) {
let mut m = $maptype::default();
for i in $keydist.take(SIZE) {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
for i in $keydist.take(SIZE) {
black_box(m.get(&i));
}
})
});
eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst));
}
};
}
Expand All @@ -157,7 +177,7 @@ macro_rules! bench_lookup_fail {
let mut m = $maptype::default();
let mut iter = $keydist;
for i in (&mut iter).take(SIZE) {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
Expand Down Expand Up @@ -185,7 +205,7 @@ macro_rules! bench_iter {
fn $name(b: &mut Bencher) {
let mut m = $maptype::default();
for i in ($keydist).take(SIZE) {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
Expand All @@ -211,7 +231,7 @@ bench_suite!(
fn clone_small(b: &mut Bencher) {
let mut m = HashMap::new();
for i in 0..10 {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
Expand All @@ -224,7 +244,7 @@ fn clone_from_small(b: &mut Bencher) {
let mut m = HashMap::new();
let mut m2 = HashMap::new();
for i in 0..10 {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
Expand All @@ -237,7 +257,7 @@ fn clone_from_small(b: &mut Bencher) {
fn clone_large(b: &mut Bencher) {
let mut m = HashMap::new();
for i in 0..1000 {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
Expand All @@ -250,7 +270,7 @@ fn clone_from_large(b: &mut Bencher) {
let mut m = HashMap::new();
let mut m2 = HashMap::new();
for i in 0..1000 {
m.insert(i, i);
m.insert(i, DropType(i));
}

b.iter(|| {
Expand Down
43 changes: 16 additions & 27 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,27 +145,22 @@ fn h2(hash: u64) -> u8 {
/// Proof that the probe will visit every group in the table:
/// <https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/>
struct ProbeSeq {
bucket_mask: usize,
pos: usize,
stride: usize,
}

impl Iterator for ProbeSeq {
type Item = usize;

impl ProbeSeq {
#[inline]
fn next(&mut self) -> Option<usize> {
fn move_next(&mut self, bucket_mask: usize) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you change this to pass the bucket mask as an argument?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also this method needs #[inline].

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ProbeSeq never changes it so it duplicates data already found in the table. Figured it was only there since Iterator::next couldn't take an argument.

// We should have found an empty bucket by now and ended the probe.
debug_assert!(
self.stride <= self.bucket_mask,
self.stride <= bucket_mask,
"Went past end of probe sequence"
);

let result = self.pos;
self.stride += Group::WIDTH;
self.pos += self.stride;
self.pos &= self.bucket_mask;
Some(result)
self.pos &= bucket_mask;
}
}

Expand Down Expand Up @@ -578,15 +573,14 @@ impl<T> RawTable<T> {
}
}

/// Returns an iterator for a probe sequence on the table.
/// Returns an iterator-like object for a probe sequence on the table.
///
/// This iterator never terminates, but is guaranteed to visit each bucket
/// group exactly once. The loop using `probe_seq` must terminate upon
/// reaching a group containing an empty bucket.
#[cfg_attr(feature = "inline-more", inline)]
fn probe_seq(&self, hash: u64) -> ProbeSeq {
ProbeSeq {
bucket_mask: self.bucket_mask,
pos: h1(hash) & self.bucket_mask,
stride: 0,
}
Expand Down Expand Up @@ -626,11 +620,12 @@ impl<T> RawTable<T> {
/// There must be at least 1 empty bucket in the table.
#[cfg_attr(feature = "inline-more", inline)]
fn find_insert_slot(&self, hash: u64) -> usize {
for pos in self.probe_seq(hash) {
let mut probe_seq = self.probe_seq(hash);
loop {
unsafe {
let group = Group::load(self.ctrl(pos));
let group = Group::load(self.ctrl(probe_seq.pos));
if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() {
let result = (pos + bit) & self.bucket_mask;
let result = (probe_seq.pos + bit) & self.bucket_mask;

// In tables smaller than the group width, trailing control
// bytes outside the range of the table are filled with
Expand All @@ -643,7 +638,7 @@ impl<T> RawTable<T> {
// control bytes (containing EMPTY).
if unlikely(is_full(*self.ctrl(result))) {
debug_assert!(self.bucket_mask < Group::WIDTH);
debug_assert_ne!(pos, 0);
debug_assert_ne!(probe_seq.pos, 0);
return Group::load_aligned(self.ctrl(0))
.match_empty_or_deleted()
.lowest_set_bit_nonzero();
Expand All @@ -652,10 +647,8 @@ impl<T> RawTable<T> {
}
}
}
probe_seq.move_next(self.bucket_mask);
}

// probe_seq never returns.
unreachable!();
}

/// Marks all table buckets as empty without dropping their contents.
Expand Down Expand Up @@ -1872,8 +1865,6 @@ pub struct RawIterHash<'a, T> {
// The sequence of groups to probe in the search.
probe_seq: ProbeSeq,

// The current group and its position.
pos: usize,
group: Group,

// The elements within the group with a matching h2-hash.
Expand All @@ -1884,16 +1875,14 @@ impl<'a, T> RawIterHash<'a, T> {
fn new(table: &'a RawTable<T>, hash: u64) -> Self {
unsafe {
let h2_hash = h2(hash);
let mut probe_seq = table.probe_seq(hash);
let pos = probe_seq.next().unwrap();
let group = Group::load(table.ctrl(pos));
let probe_seq = table.probe_seq(hash);
let group = Group::load(table.ctrl(probe_seq.pos));
let bitmask = group.match_byte(h2_hash).into_iter();

RawIterHash {
table,
h2_hash,
probe_seq,
pos,
group,
bitmask,
}
Expand All @@ -1908,15 +1897,15 @@ impl<'a, T> Iterator for RawIterHash<'a, T> {
unsafe {
loop {
if let Some(bit) = self.bitmask.next() {
let index = (self.pos + bit) & self.table.bucket_mask;
let index = (self.probe_seq.pos + bit) & self.table.bucket_mask;
let bucket = self.table.bucket(index);
return Some(bucket);
}
if likely(self.group.match_empty().any_bit_set()) {
return None;
}
self.pos = self.probe_seq.next().unwrap();
self.group = Group::load(self.table.ctrl(self.pos));
self.probe_seq.move_next(self.table.bucket_mask);
self.group = Group::load(self.table.ctrl(self.probe_seq.pos));
self.bitmask = self.group.match_byte(self.h2_hash).into_iter();
}
}
Expand Down