@@ -87,52 +87,118 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
87
87
type Output = String ;
88
88
89
89
fn concat ( & self ) -> String {
90
- if self . is_empty ( ) {
91
- return String :: new ( ) ;
92
- }
93
-
94
- // `len` calculation may overflow but push_str will check boundaries
95
- let len = self . iter ( ) . map ( |s| s. borrow ( ) . len ( ) ) . sum ( ) ;
96
- let mut result = String :: with_capacity ( len) ;
97
-
98
- for s in self {
99
- result. push_str ( s. borrow ( ) )
100
- }
101
-
102
- result
90
+ self . join ( "" )
103
91
}
104
92
105
93
fn join ( & self , sep : & str ) -> String {
106
- if self . is_empty ( ) {
107
- return String :: new ( ) ;
94
+ unsafe {
95
+ String :: from_utf8_unchecked ( join_generic_copy ( self , sep . as_bytes ( ) ) )
108
96
}
97
+ }
109
98
110
- // concat is faster
111
- if sep . is_empty ( ) {
112
- return self . concat ( ) ;
113
- }
99
+ fn connect ( & self , sep : & str ) -> String {
100
+ self . join ( sep )
101
+ }
102
+ }
114
103
115
- // this is wrong without the guarantee that `self` is non-empty
116
- // `len` calculation may overflow but push_str but will check boundaries
117
- let len = sep. len ( ) * ( self . len ( ) - 1 ) +
118
- self . iter ( ) . map ( |s| s. borrow ( ) . len ( ) ) . sum :: < usize > ( ) ;
119
- let mut result = String :: with_capacity ( len) ;
120
- let mut first = true ;
104
+ macro_rules! spezialize_for_lengths {
105
+ ( $separator: expr, $target: expr, $iter: expr; $( $num: expr) ,* ) => {
106
+ let mut target = $target;
107
+ let iter = $iter;
108
+ let sep_len = $separator. len( ) ;
109
+ let sep_bytes = $separator;
110
+ match $separator. len( ) {
111
+ $(
112
+ // loops with hardcoded sizes run much faster
113
+ // specialize the cases with small separator lengths
114
+ $num => {
115
+ for s in iter {
116
+ target. get_unchecked_mut( ..$num)
117
+ . copy_from_slice( sep_bytes) ;
118
+
119
+ let s_bytes = s. borrow( ) . as_ref( ) ;
120
+ let offset = s_bytes. len( ) ;
121
+ target = { target} . get_unchecked_mut( $num..) ;
122
+ target. get_unchecked_mut( ..offset)
123
+ . copy_from_slice( s_bytes) ;
124
+ target = { target} . get_unchecked_mut( offset..) ;
125
+ }
126
+ } ,
127
+ ) *
128
+ 0 => {
129
+ // concat, same principle without the separator
130
+ for s in iter {
131
+ let s_bytes = s. borrow( ) . as_ref( ) ;
132
+ let offset = s_bytes. len( ) ;
133
+ target. get_unchecked_mut( ..offset)
134
+ . copy_from_slice( s_bytes) ;
135
+ target = { target} . get_unchecked_mut( offset..) ;
136
+ }
137
+ } ,
138
+ _ => {
139
+ // arbitrary non-zero size fallback
140
+ for s in iter {
141
+ target. get_unchecked_mut( ..sep_len)
142
+ . copy_from_slice( sep_bytes) ;
143
+
144
+ let s_bytes = s. borrow( ) . as_ref( ) ;
145
+ let offset = s_bytes. len( ) ;
146
+ target = { target} . get_unchecked_mut( sep_len..) ;
147
+ target. get_unchecked_mut( ..offset)
148
+ . copy_from_slice( s_bytes) ;
149
+ target = { target} . get_unchecked_mut( offset..) ;
150
+ }
151
+ }
152
+ }
153
+ } ;
154
+ }
121
155
122
- for s in self {
123
- if first {
124
- first = false ;
125
- } else {
126
- result. push_str ( sep) ;
156
+ // Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
157
+ // Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
158
+ // For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
159
+ // only user of this function. It is left in place for the time when that is fixed.
160
+ //
161
+ // the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
162
+ // [T] and str both impl AsRef<[T]> for some T
163
+ // => s.borrow().as_ref() and we always have slices
164
+ fn join_generic_copy < B , T , S > ( slice : & [ S ] , sep : & [ T ] ) -> Vec < T >
165
+ where
166
+ T : Copy ,
167
+ B : AsRef < [ T ] > + ?Sized ,
168
+ S : Borrow < B > ,
169
+ {
170
+ let sep_len = sep. len ( ) ;
171
+ let mut iter = slice. iter ( ) ;
172
+ iter. next ( ) . map_or ( vec ! [ ] , |first| {
173
+ // this is wrong without the guarantee that `slice` is non-empty
174
+ // if the `len` calculation overflows, we'll panic
175
+ // we would have run out of memory anyway and the rest of the function requires
176
+ // the entire String pre-allocated for safety
177
+ //
178
+ // this is the exact len of the resulting String
179
+ let len = sep_len. checked_mul ( slice. len ( ) - 1 ) . and_then ( |n| {
180
+ slice. iter ( ) . map ( |s| s. borrow ( ) . as_ref ( ) . len ( ) ) . try_fold ( n, usize:: checked_add)
181
+ } ) . expect ( "attempt to join into collection with len > usize::MAX" ) ;
182
+
183
+ // crucial for safety
184
+ let mut result = Vec :: with_capacity ( len) ;
185
+
186
+ unsafe {
187
+ result. extend_from_slice ( first. borrow ( ) . as_ref ( ) ) ;
188
+
189
+ {
190
+ let pos = result. len ( ) ;
191
+ let target = result. get_unchecked_mut ( pos..len) ;
192
+
193
+ // copy separator and strs over without bounds checks
194
+ // generate loops with hardcoded offsets for small separators
195
+ // massive improvements possible (~ x2)
196
+ spezialize_for_lengths ! ( sep, target, iter; 1 , 2 , 3 , 4 ) ;
127
197
}
128
- result. push_str ( s . borrow ( ) ) ;
198
+ result. set_len ( len ) ;
129
199
}
130
200
result
131
- }
132
-
133
- fn connect ( & self , sep : & str ) -> String {
134
- self . join ( sep)
135
- }
201
+ } )
136
202
}
137
203
138
204
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
0 commit comments