Skip to content

Commit e185b04

Browse files
committed
auto merge of #8545 : blake2-ppc/rust/iterbytes, r=alexcrichton
Address issue #5257, for example these values all had the same hash value: ("aaa", "bbb", "ccc") ("aaab", "bb", "ccc") ("aaabbb", "", "ccc") IterBytes for &[A] now includes the length, before calling iter_bytes on each element. IterBytes for &str is now terminated by a byte that does not appear in UTF-8. This way only one more byte is processed when hashing strings.
2 parents 88bd215 + bfa1331 commit e185b04

File tree

3 files changed

+89
-61
lines changed

3 files changed

+89
-61
lines changed

src/libstd/hash.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,14 @@ mod tests {
409409

410410
use uint;
411411

412+
// Hash just the bytes of the slice, without length prefix
413+
struct Bytes<'self>(&'self [u8]);
414+
impl<'self> IterBytes for Bytes<'self> {
415+
fn iter_bytes(&self, _lsb0: bool, f: &fn(&[u8]) -> bool) -> bool {
416+
f(**self)
417+
}
418+
}
419+
412420
#[test]
413421
fn test_siphash() {
414422
let vecs : [[u8, ..8], ..64] = [
@@ -496,7 +504,7 @@ mod tests {
496504
while t < 64 {
497505
debug!("siphash test %?", t);
498506
let vec = u8to64_le!(vecs[t], 0);
499-
let out = buf.hash_keyed(k0, k1);
507+
let out = Bytes(buf.as_slice()).hash_keyed(k0, k1);
500508
debug!("got %?, expected %?", out, vec);
501509
assert_eq!(vec, out);
502510

@@ -587,4 +595,18 @@ mod tests {
587595
fn test_float_hashes_of_zero() {
588596
assert_eq!(0.0.hash(), (-0.0).hash());
589597
}
598+
599+
#[test]
600+
fn test_hash_no_concat_alias() {
601+
let s = ("aa", "bb");
602+
let t = ("aabb", "");
603+
let u = ("a", "abb");
604+
605+
let v = (&[1u8], &[0u8, 0], &[0u8]);
606+
let w = (&[1u8, 0, 0, 0], &[], &[]);
607+
608+
assert!(v != w);
609+
assert!(s.hash() != t.hash() && s.hash() != u.hash());
610+
assert!(v.hash() != w.hash());
611+
}
590612
}

src/libstd/str/ascii.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,6 @@ static ASCII_UPPER_MAP: &'static [u8] = &[
376376
#[cfg(test)]
377377
mod tests {
378378
use super::*;
379-
use to_bytes::ToBytes;
380379
use str::from_char;
381380

382381
macro_rules! v2ascii (
@@ -445,7 +444,6 @@ mod tests {
445444
446445
#[test]
447446
fn test_ascii_to_bytes() {
448-
assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
449447
assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
450448
}
451449

src/libstd/to_bytes.rs

Lines changed: 66 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -15,37 +15,43 @@ The `ToBytes` and `IterBytes` traits
1515
*/
1616

1717
use cast;
18+
use container::Container;
1819
use io;
1920
use io::Writer;
2021
use iterator::Iterator;
2122
use option::{None, Option, Some};
22-
use str::StrSlice;
23-
use vec::ImmutableVector;
23+
use str::{Str, StrSlice};
24+
use vec::{Vector, ImmutableVector};
2425

2526
pub type Cb<'self> = &'self fn(buf: &[u8]) -> bool;
2627

27-
/**
28-
* A trait to implement in order to make a type hashable;
29-
* This works in combination with the trait `Hash::Hash`, and
30-
* may in the future be merged with that trait or otherwise
31-
* modified when default methods and trait inheritance are
32-
* completed.
33-
*/
28+
///
29+
/// A trait to implement in order to make a type hashable;
30+
/// This works in combination with the trait `std::hash::Hash`, and
31+
/// may in the future be merged with that trait or otherwise
32+
/// modified when default methods and trait inheritance are
33+
/// completed.
34+
///
35+
/// IterBytes should be implemented so that the extent of the
36+
/// produced byte stream can be discovered, given the original
37+
/// type.
38+
/// For example, the IterBytes implementation for vectors emits
39+
/// its length first, and enums should emit their discriminant.
40+
///
3441
pub trait IterBytes {
35-
/**
36-
* Call the provided callback `f` one or more times with
37-
* byte-slices that should be used when computing a hash
38-
* value or otherwise "flattening" the structure into
39-
* a sequence of bytes. The `lsb0` parameter conveys
40-
* whether the caller is asking for little-endian bytes
41-
* (`true`) or big-endian (`false`); this should only be
42-
* relevant in implementations that represent a single
43-
* multi-byte datum such as a 32 bit integer or 64 bit
44-
* floating-point value. It can be safely ignored for
45-
* larger structured types as they are usually processed
46-
* left-to-right in declaration order, regardless of
47-
* underlying memory endianness.
48-
*/
42+
/// Call the provided callback `f` one or more times with
43+
/// byte-slices that should be used when computing a hash
44+
/// value or otherwise "flattening" the structure into
45+
/// a sequence of bytes. The `lsb0` parameter conveys
46+
/// whether the caller is asking for little-endian bytes
47+
/// (`true`) or big-endian (`false`); this should only be
48+
/// relevant in implementations that represent a single
49+
/// multi-byte datum such as a 32 bit integer or 64 bit
50+
/// floating-point value. It can be safely ignored for
51+
/// larger structured types as they are usually processed
52+
/// left-to-right in declaration order, regardless of
53+
/// underlying memory endianness.
54+
///
4955
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool;
5056
}
5157

@@ -224,74 +230,76 @@ impl IterBytes for f64 {
224230
impl<'self,A:IterBytes> IterBytes for &'self [A] {
225231
#[inline]
226232
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
233+
self.len().iter_bytes(lsb0, |b| f(b)) &&
227234
self.iter().advance(|elt| elt.iter_bytes(lsb0, |b| f(b)))
228235
}
229236
}
230237

231-
impl<A:IterBytes,B:IterBytes> IterBytes for (A,B) {
232-
#[inline]
233-
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
234-
match *self {
235-
(ref a, ref b) => { a.iter_bytes(lsb0, |b| f(b)) &&
236-
b.iter_bytes(lsb0, |b| f(b)) }
237-
}
238-
}
239-
}
240-
241-
impl<A:IterBytes,B:IterBytes,C:IterBytes> IterBytes for (A,B,C) {
242-
#[inline]
243-
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
244-
match *self {
245-
(ref a, ref b, ref c) => {
246-
a.iter_bytes(lsb0, |b| f(b)) &&
247-
b.iter_bytes(lsb0, |b| f(b)) &&
248-
c.iter_bytes(lsb0, |b| f(b))
249-
}
238+
impl<A: IterBytes> IterBytes for (A, ) {
239+
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
240+
match *self {
241+
(ref a, ) => a.iter_bytes(lsb0, |b| f(b))
242+
}
250243
}
251-
}
252244
}
253245

254-
// Move this to vec, probably.
255-
fn borrow<'x,A>(a: &'x [A]) -> &'x [A] {
256-
a
257-
}
246+
macro_rules! iter_bytes_tuple(
247+
($($A:ident),+) => (
248+
impl<$($A: IterBytes),+> IterBytes for ($($A),+) {
249+
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
250+
match *self {
251+
($(ref $A),+) => {
252+
$(
253+
$A .iter_bytes(lsb0, |b| f(b))
254+
)&&+
255+
}
256+
}
257+
}
258+
}
259+
)
260+
)
261+
262+
iter_bytes_tuple!(A, B)
263+
iter_bytes_tuple!(A, B, C)
264+
iter_bytes_tuple!(A, B, C, D)
265+
iter_bytes_tuple!(A, B, C, D, E)
266+
iter_bytes_tuple!(A, B, C, D, E, F)
267+
iter_bytes_tuple!(A, B, C, D, E, F, G)
268+
iter_bytes_tuple!(A, B, C, D, E, F, G, H)
258269

259270
impl<A:IterBytes> IterBytes for ~[A] {
260271
#[inline]
261272
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
262-
borrow(*self).iter_bytes(lsb0, f)
273+
self.as_slice().iter_bytes(lsb0, f)
263274
}
264275
}
265276

266277
impl<A:IterBytes> IterBytes for @[A] {
267278
#[inline]
268279
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
269-
borrow(*self).iter_bytes(lsb0, f)
280+
self.as_slice().iter_bytes(lsb0, f)
270281
}
271282
}
272283

273284
impl<'self> IterBytes for &'self str {
274285
#[inline]
275286
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
276-
f(self.as_bytes())
287+
// Terminate the string with a byte that does not appear in UTF-8
288+
f(self.as_bytes()) && f([0xFF])
277289
}
278290
}
279291

280292
impl IterBytes for ~str {
281293
#[inline]
282-
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
283-
// this should possibly include the null terminator, but that
284-
// breaks .find_equiv on hashmaps.
285-
f(self.as_bytes())
294+
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
295+
self.as_slice().iter_bytes(lsb0, f)
286296
}
287297
}
288298

289299
impl IterBytes for @str {
290300
#[inline]
291-
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
292-
// this should possibly include the null terminator, but that
293-
// breaks .find_equiv on hashmaps.
294-
f(self.as_bytes())
301+
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
302+
self.as_slice().iter_bytes(lsb0, f)
295303
}
296304
}
297305

0 commit comments

Comments
 (0)