Skip to content

optimize joining for slices #50340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/liballoc/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,17 @@ impl<T: Clone, V: Borrow<[T]>> SliceConcatExt<T> for [V] {
}

fn join(&self, sep: &T) -> Vec<T> {
let mut iter = self.iter();
let first = match iter.next() {
Some(first) => first,
None => return vec![],
};
let size = self.iter().fold(0, |acc, v| acc + v.borrow().len());
let mut result = Vec::with_capacity(size + self.len());
let mut first = true;
for v in self {
if first {
first = false
} else {
result.push(sep.clone())
}
result.extend_from_slice(first.borrow());

for v in iter {
result.push(sep.clone());
result.extend_from_slice(v.borrow())
}
result
Expand Down
126 changes: 91 additions & 35 deletions src/liballoc/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,52 +86,108 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
type Output = String;

fn concat(&self) -> String {
if self.is_empty() {
return String::new();
}

// `len` calculation may overflow but push_str will check boundaries
let len = self.iter().map(|s| s.borrow().len()).sum();
let mut result = String::with_capacity(len);

for s in self {
result.push_str(s.borrow())
}

result
self.join("")
}

fn join(&self, sep: &str) -> String {
if self.is_empty() {
return String::new();
}

// concat is faster
if sep.is_empty() {
return self.concat();
unsafe {
String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) )
}
}

// this is wrong without the guarantee that `self` is non-empty
// `len` calculation may overflow but push_str but will check boundaries
let len = sep.len() * (self.len() - 1) +
self.iter().map(|s| s.borrow().len()).sum::<usize>();
let mut result = String::with_capacity(len);
let mut first = true;
fn connect(&self, sep: &str) -> String {
self.join(sep)
}
}

for s in self {
if first {
first = false;
} else {
result.push_str(sep);
macro_rules! spezialize_for_lengths {
($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {
let mut target = $target;
let iter = $iter;
let sep_bytes = $separator;
match $separator.len() {
$(
// loops with hardcoded sizes run much faster
// specialize the cases with small separator lengths
$num => {
for s in iter {
copy_slice_and_advance!(target, sep_bytes);
copy_slice_and_advance!(target, s.borrow().as_ref());
}
},
)*
_ => {
// arbitrary non-zero size fallback
for s in iter {
copy_slice_and_advance!(target, sep_bytes);
copy_slice_and_advance!(target, s.borrow().as_ref());
}
}
result.push_str(s.borrow());
}
result
};
}

macro_rules! copy_slice_and_advance {
($target:expr, $bytes:expr) => {
let len = $bytes.len();
let (head, tail) = {$target}.split_at_mut(len);
head.copy_from_slice($bytes);
$target = tail;
}
}

fn connect(&self, sep: &str) -> String {
self.join(sep)
// Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
// For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
// only user of this function. It is left in place for the time when that is fixed.
//
// the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
// [T] and str both impl AsRef<[T]> for some T
// => s.borrow().as_ref() and we always have slices
fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T>
where
T: Copy,
B: AsRef<[T]> + ?Sized,
S: Borrow<B>,
{
let sep_len = sep.len();
let mut iter = slice.iter();

// the first slice is the only one without a separator preceding it
let first = match iter.next() {
Some(first) => first,
None => return vec![],
};

// compute the exact total length of the joined Vec
// if the `len` calculation overflows, we'll panic
// we would have run out of memory anyway and the rest of the function requires
// the entire Vec pre-allocated for safety
let len = sep_len.checked_mul(iter.len()).and_then(|n| {
slice.iter()
.map(|s| s.borrow().as_ref().len())
.try_fold(n, usize::checked_add)
}).expect("attempt to join into collection with len > usize::MAX");

// crucial for safety
let mut result = Vec::with_capacity(len);
assert!(result.capacity() >= len);

result.extend_from_slice(first.borrow().as_ref());

unsafe {
{
let pos = result.len();
let target = result.get_unchecked_mut(pos..len);

// copy separator and slices over without bounds checks
// generate loops with hardcoded offsets for small separators
// massive improvements possible (~ x2)
spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4);
}
result.set_len(len);
}
result
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
9 changes: 9 additions & 0 deletions src/liballoc/tests/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,15 @@ fn test_join() {
assert_eq!(v.join(&0), [1, 0, 2, 0, 3]);
}

#[test]
fn test_join_nocopy() {
let v: [String; 0] = [];
assert_eq!(v.join(","), "");
assert_eq!(["a".to_string(), "ab".into()].join(","), "a,ab");
assert_eq!(["a".to_string(), "ab".into(), "abc".into()].join(","), "a,ab,abc");
assert_eq!(["a".to_string(), "ab".into(), "".into()].join(","), "a,ab,");
}

#[test]
fn test_insert() {
let mut a = vec![1, 2, 4];
Expand Down
13 changes: 13 additions & 0 deletions src/liballoc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,19 @@ fn test_join_for_different_lengths() {
test_join!("-a-bc", ["", "a", "bc"], "-");
}

// join has fast paths for small separators up to 4 bytes
// this tests the slow paths.
#[test]
fn test_join_for_different_lengths_with_long_separator() {
assert_eq!("~~~~~".len(), 15);

let empty: &[&str] = &[];
test_join!("", empty, "~~~~~");
test_join!("a", ["a"], "~~~~~");
test_join!("a~~~~~b", ["a", "b"], "~~~~~");
test_join!("~~~~~a~~~~~bc", ["", "a", "bc"], "~~~~~");
}

#[test]
fn test_unsafe_slice() {
assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
Expand Down