@@ -22,8 +22,7 @@ use char;
22
22
use char:: Char ;
23
23
use clone:: { Clone , DeepClone } ;
24
24
use container:: { Container , Mutable } ;
25
- use num:: Times ;
26
- use iter:: { Iterator , FromIterator , Extendable } ;
25
+ use iter:: { Iterator , FromIterator , Extendable , range} ;
27
26
use iter:: { Filter , AdditiveIterator , Map } ;
28
27
use iter:: { Invert , DoubleEndedIterator , ExactSize } ;
29
28
use libc;
@@ -33,7 +32,6 @@ use ptr;
33
32
use ptr:: RawPtr ;
34
33
use to_str:: ToStr ;
35
34
use uint;
36
- use unstable:: raw:: { Repr , Slice } ;
37
35
use vec;
38
36
use vec:: { OwnedVector , OwnedCopyableVector , ImmutableVector , MutableVector } ;
39
37
use default:: Default ;
@@ -185,23 +183,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
185
183
fn concat ( & self ) -> ~str {
186
184
if self . is_empty ( ) { return ~""; }
187
185
186
+ // `len` calculation may overflow but push_str but will check boundaries
188
187
let len = self . iter ( ) . map ( |s| s. as_slice ( ) . len ( ) ) . sum ( ) ;
189
188
190
- let mut s = with_capacity ( len) ;
189
+ let mut result = with_capacity ( len) ;
191
190
192
- unsafe {
193
- do s. as_mut_buf |buf, _| {
194
- let mut buf = buf;
195
- for ss in self . iter ( ) {
196
- do ss. as_slice ( ) . as_imm_buf |ssbuf, sslen| {
197
- ptr:: copy_memory ( buf, ssbuf, sslen) ;
198
- buf = buf. offset ( sslen as int ) ;
199
- }
200
- }
201
- }
202
- raw:: set_len ( & mut s, len) ;
191
+ for s in self . iter ( ) {
192
+ result. push_str ( s. as_slice ( ) )
203
193
}
204
- s
194
+ result
205
195
}
206
196
207
197
/// Concatenate a vector of strings, placing a given separator between each.
@@ -212,34 +202,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
212
202
if sep. is_empty ( ) { return self . concat ( ) ; }
213
203
214
204
// this is wrong without the guarantee that `self` is non-empty
205
+ // `len` calculation may overflow but push_str but will check boundaries
215
206
let len = sep. len ( ) * ( self . len ( ) - 1 )
216
207
+ self . iter ( ) . map ( |s| s. as_slice ( ) . len ( ) ) . sum ( ) ;
217
- let mut s = ~"" ;
208
+ let mut result = with_capacity ( len ) ;
218
209
let mut first = true ;
219
210
220
- s. reserve ( len) ;
221
-
222
- unsafe {
223
- do s. as_mut_buf |buf, _| {
224
- do sep. as_imm_buf |sepbuf, seplen| {
225
- let mut buf = buf;
226
- for ss in self . iter ( ) {
227
- do ss. as_slice ( ) . as_imm_buf |ssbuf, sslen| {
228
- if first {
229
- first = false ;
230
- } else {
231
- ptr:: copy_memory ( buf, sepbuf, seplen) ;
232
- buf = buf. offset ( seplen as int ) ;
233
- }
234
- ptr:: copy_memory ( buf, ssbuf, sslen) ;
235
- buf = buf. offset ( sslen as int ) ;
236
- }
237
- }
238
- }
211
+ for s in self . iter ( ) {
212
+ if first {
213
+ first = false ;
214
+ } else {
215
+ result. push_str ( sep) ;
239
216
}
240
- raw :: set_len ( & mut s , len ) ;
217
+ result . push_str ( s . as_slice ( ) ) ;
241
218
}
242
- s
219
+ result
243
220
}
244
221
}
245
222
@@ -961,7 +938,6 @@ static TAG_CONT_U8: u8 = 128u8;
961
938
962
939
/// Unsafe operations
963
940
pub mod raw {
964
- use option:: Some ;
965
941
use cast;
966
942
use libc;
967
943
use ptr;
@@ -1064,21 +1040,22 @@ pub mod raw {
1064
1040
}
1065
1041
}
1066
1042
1067
- /// Appends a byte to a string. (Not UTF-8 safe).
1043
+ /// Appends a byte to a string.
1044
+ /// The caller must preserve the valid UTF-8 property.
1068
1045
#[ inline]
1069
1046
pub unsafe fn push_byte ( s : & mut ~str , b : u8 ) {
1070
- let v: & mut ~[ u8 ] = cast:: transmute ( s) ;
1071
- v. push ( b) ;
1047
+ as_owned_vec ( s) . push ( b)
1072
1048
}
1073
1049
1074
- /// Appends a vector of bytes to a string. (Not UTF-8 safe).
1075
- unsafe fn push_bytes ( s : & mut ~ str , bytes : & [ u8 ] ) {
1076
- let new_len = s . len ( ) + bytes . len ( ) ;
1077
- s . reserve_at_least ( new_len ) ;
1078
- for byte in bytes. iter ( ) { push_byte ( & mut * s , * byte ) ; }
1050
+ /// Appends a vector of bytes to a string.
1051
+ /// The caller must preserve the valid UTF-8 property.
1052
+ # [ inline ]
1053
+ pub unsafe fn push_bytes ( s : & mut ~ str , bytes : & [ u8 ] ) {
1054
+ vec :: bytes:: push_bytes ( as_owned_vec ( s ) , bytes ) ;
1079
1055
}
1080
1056
1081
- /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1057
+ /// Removes the last byte from a string and returns it.
1058
+ /// The caller must preserve the valid UTF-8 property.
1082
1059
pub unsafe fn pop_byte ( s : & mut ~str ) -> u8 {
1083
1060
let len = s. len ( ) ;
1084
1061
assert ! ( ( len > 0 u) ) ;
@@ -1087,7 +1064,8 @@ pub mod raw {
1087
1064
return b;
1088
1065
}
1089
1066
1090
- /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1067
+ /// Removes the first byte from a string and returns it.
1068
+ /// The caller must preserve the valid UTF-8 property.
1091
1069
pub unsafe fn shift_byte ( s : & mut ~str ) -> u8 {
1092
1070
let len = s. len ( ) ;
1093
1071
assert ! ( ( len > 0 u) ) ;
@@ -1096,15 +1074,21 @@ pub mod raw {
1096
1074
return b;
1097
1075
}
1098
1076
1077
+ /// Access the str in its vector representation.
1078
+ /// The caller must preserve the valid UTF-8 property when modifying.
1079
+ #[ inline]
1080
+ pub unsafe fn as_owned_vec < ' a > ( s : & ' a mut ~str ) -> & ' a mut ~[ u8 ] {
1081
+ cast:: transmute ( s)
1082
+ }
1083
+
1099
1084
/// Sets the length of a string
1100
1085
///
1101
1086
/// This will explicitly set the size of the string, without actually
1102
1087
/// modifying its buffers, so it is up to the caller to ensure that
1103
1088
/// the string is actually the specified size.
1104
1089
#[ inline]
1105
1090
pub unsafe fn set_len ( s : & mut ~str , new_len : uint ) {
1106
- let v: & mut ~[ u8 ] = cast:: transmute ( s) ;
1107
- vec:: raw:: set_len ( v, new_len)
1091
+ vec:: raw:: set_len ( as_owned_vec ( s) , new_len)
1108
1092
}
1109
1093
1110
1094
/// Sets the length of a string
@@ -2061,22 +2045,11 @@ impl<'self> StrSlice<'self> for &'self str {
2061
2045
2062
2046
/// Given a string, make a new string with repeated copies of it.
2063
2047
fn repeat(&self, nn: uint) -> ~str {
2064
- do self.as_imm_buf |buf, len| {
2065
- let mut ret = with_capacity(nn * len);
2066
-
2067
- unsafe {
2068
- do ret.as_mut_buf |rbuf, _len| {
2069
- let mut rbuf = rbuf;
2070
-
2071
- do nn.times {
2072
- ptr::copy_memory(rbuf, buf, len);
2073
- rbuf = rbuf.offset(len as int);
2074
- }
2075
- }
2076
- raw::set_len(&mut ret, nn * len);
2077
- }
2078
- ret
2048
+ let mut ret = with_capacity(nn * self.len());
2049
+ for _ in range(0, nn) {
2050
+ ret.push_str(*self);
2079
2051
}
2052
+ ret
2080
2053
}
2081
2054
2082
2055
/// Retrieves the first character from a string slice and returns
@@ -2199,54 +2172,35 @@ impl OwnedStr for ~str {
2199
2172
/// Appends a string slice to the back of a string, without overallocating
2200
2173
#[inline]
2201
2174
fn push_str_no_overallocate(&mut self, rhs: &str) {
2202
- unsafe {
2203
- let llen = self.len();
2204
- let rlen = rhs.len();
2205
- self.reserve(llen + rlen);
2206
- do self.as_imm_buf |lbuf, _llen| {
2207
- do rhs.as_imm_buf |rbuf, _rlen| {
2208
- let dst = ptr::offset(lbuf, llen as int);
2209
- let dst = cast::transmute_mut_unsafe(dst);
2210
- ptr::copy_memory(dst, rbuf, rlen);
2211
- }
2212
- }
2213
- raw::set_len(self, llen + rlen);
2214
- }
2175
+ let new_cap = self.len() + rhs.len();
2176
+ self.reserve(new_cap);
2177
+ self.push_str(rhs);
2215
2178
}
2216
2179
2217
2180
/// Appends a string slice to the back of a string
2218
2181
#[inline]
2219
2182
fn push_str(&mut self, rhs: &str) {
2220
2183
unsafe {
2221
- let llen = self.len();
2222
- let rlen = rhs.len();
2223
- self.reserve_at_least(llen + rlen);
2224
- do self.as_imm_buf |lbuf, _llen| {
2225
- do rhs.as_imm_buf |rbuf, _rlen| {
2226
- let dst = ptr::offset(lbuf, llen as int);
2227
- let dst = cast::transmute_mut_unsafe(dst);
2228
- ptr::copy_memory(dst, rbuf, rlen);
2229
- }
2230
- }
2231
- raw::set_len(self, llen + rlen);
2184
+ raw::push_bytes(self, rhs.as_bytes());
2232
2185
}
2233
2186
}
2234
2187
2235
2188
/// Appends a character to the back of a string
2236
2189
#[inline]
2237
2190
fn push_char(&mut self, c: char) {
2238
2191
let cur_len = self.len();
2239
- self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
2240
-
2241
- // Attempt to not use an intermediate buffer by just pushing bytes
2242
- // directly onto this string.
2192
+ // may use up to 4 bytes.
2243
2193
unsafe {
2244
- let v = self.repr();
2245
- let len = c.encode_utf8(cast::transmute(Slice {
2246
- data: ((&(*v).data) as *u8).offset(cur_len as int),
2247
- len: 4,
2248
- }));
2249
- raw::set_len(self, cur_len + len);
2194
+ raw::as_owned_vec(self).reserve_additional(4);
2195
+
2196
+ // Attempt to not use an intermediate buffer by just pushing bytes
2197
+ // directly onto this string.
2198
+ let used = do self.as_mut_buf |buf, _| {
2199
+ do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
2200
+ c.encode_utf8(slc)
2201
+ }
2202
+ };
2203
+ raw::set_len(self, cur_len + used);
2250
2204
}
2251
2205
}
2252
2206
@@ -2306,8 +2260,7 @@ impl OwnedStr for ~str {
2306
2260
#[inline]
2307
2261
fn reserve(&mut self, n: uint) {
2308
2262
unsafe {
2309
- let v: &mut ~[u8] = cast::transmute(self);
2310
- (*v).reserve(n);
2263
+ raw::as_owned_vec(self).reserve(n)
2311
2264
}
2312
2265
}
2313
2266
@@ -2329,7 +2282,7 @@ impl OwnedStr for ~str {
2329
2282
/// * n - The number of bytes to reserve space for
2330
2283
#[inline]
2331
2284
fn reserve_at_least(&mut self, n: uint) {
2332
- self.reserve(uint::next_power_of_two (n))
2285
+ self.reserve(uint::next_power_of_two_opt(n).unwrap_or (n))
2333
2286
}
2334
2287
2335
2288
/// Returns the number of single-byte characters the string can hold without
@@ -2359,8 +2312,9 @@ impl OwnedStr for ~str {
2359
2312
2360
2313
#[inline]
2361
2314
fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2362
- let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2363
- v.as_mut_buf(f)
2315
+ unsafe {
2316
+ raw::as_owned_vec(self).as_mut_buf(f)
2317
+ }
2364
2318
}
2365
2319
}
2366
2320
@@ -3912,4 +3866,23 @@ mod bench {
3912
3866
with_capacity(100);
3913
3867
}
3914
3868
}
3869
+
3870
+ #[bench]
3871
+ fn bench_push_str(bh: &mut BenchHarness) {
3872
+ let s = " ศไทย中华Việt Nam ; Mary had a little lamb, Little lamb";
3873
+ do bh.iter {
3874
+ let mut r = ~" ";
3875
+ r.push_str(s);
3876
+ }
3877
+ }
3878
+
3879
+ #[bench]
3880
+ fn bench_connect(bh: &mut BenchHarness) {
3881
+ let s = " ศไทย中华Việt Nam ; Mary had a little lamb, Little lamb";
3882
+ let sep = " →";
3883
+ let v = [ s, s, s, s, s, s, s, s, s, s] ;
3884
+ do bh. iter {
3885
+ assert_eq!( v. connect( sep) . len( ) , s. len( ) * 10 + sep. len( ) * 9 ) ;
3886
+ }
3887
+ }
3915
3888
}
0 commit comments