Skip to content

Commit d5e9033

Browse files
committed
auto merge of #9108 : blake2-ppc/rust/hazards-on-overflow, r=alexcrichton
Fix uint overflow bugs in std::{at_vec, vec, str} Closes #8742 Fix issue #8742, which summarized is: unsafe code in vec and str did assume that a reservation for `X + Y` elements always succeeded, and didn't overflow. Introduce the method `Vec::reserve_additional(n)` to make it easy to check for overflow in `Vec::push` and `Vec::push_all`. In std::str, simplify and remove a lot of the unsafe code and use `push_str` instead. With improvements to `.push_str` and the new function `vec::bytes::push_bytes`, it looks like this change has either no or positive impact on performance. I believe there are many places still where `v.reserve(A + B)` still can overflow. This by itself is not an issue unless followed by (unsafe) code that steps aside boundary checks.
2 parents 2f96c22 + e211888 commit d5e9033

File tree

5 files changed

+152
-118
lines changed

5 files changed

+152
-118
lines changed

src/libstd/at_vec.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,16 @@ pub mod raw {
230230
// Implementation detail. Shouldn't be public
231231
#[allow(missing_doc)]
232232
pub fn reserve_raw(ty: *TyDesc, ptr: *mut *mut Box<Vec<()>>, n: uint) {
233-
233+
// check for `uint` overflow
234234
unsafe {
235-
let size_in_bytes = n * (*ty).size;
236-
if size_in_bytes > (**ptr).data.alloc {
237-
let total_size = size_in_bytes + sys::size_of::<Vec<()>>();
235+
if n > (**ptr).data.alloc / (*ty).size {
236+
let alloc = n * (*ty).size;
237+
let total_size = alloc + sys::size_of::<Vec<()>>();
238+
if alloc / (*ty).size != n || total_size < alloc {
239+
fail!("vector size is too large: %u", n);
240+
}
238241
(*ptr) = local_realloc(*ptr as *(), total_size) as *mut Box<Vec<()>>;
239-
(**ptr).data.alloc = size_in_bytes;
242+
(**ptr).data.alloc = alloc;
240243
}
241244
}
242245

src/libstd/num/uint.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,17 @@ pub fn next_power_of_two(n: uint) -> uint {
101101
let mut tmp: uint = n - 1u;
102102
let mut shift: uint = 1u;
103103
while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
104-
return tmp + 1u;
104+
tmp + 1u
105+
}
106+
107+
/// Returns the smallest power of 2 greater than or equal to `n`
108+
#[inline]
109+
pub fn next_power_of_two_opt(n: uint) -> Option<uint> {
110+
let halfbits: uint = sys::size_of::<uint>() * 4u;
111+
let mut tmp: uint = n - 1u;
112+
let mut shift: uint = 1u;
113+
while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
114+
tmp.checked_add(&1)
105115
}
106116

107117
#[cfg(target_word_size = "32")]

src/libstd/rt/io/extensions.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ impl<T: Reader> ReaderUtil for T {
303303
let start_len = buf.len();
304304
let mut total_read = 0;
305305

306-
buf.reserve_at_least(start_len + len);
306+
buf.reserve_additional(len);
307307
vec::raw::set_len(buf, start_len + len);
308308

309309
do (|| {

src/libstd/str.rs

+78-105
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ use char;
2222
use char::Char;
2323
use clone::{Clone, DeepClone};
2424
use container::{Container, Mutable};
25-
use num::Times;
26-
use iter::{Iterator, FromIterator, Extendable};
25+
use iter::{Iterator, FromIterator, Extendable, range};
2726
use iter::{Filter, AdditiveIterator, Map};
2827
use iter::{Invert, DoubleEndedIterator, ExactSize};
2928
use libc;
@@ -33,7 +32,6 @@ use ptr;
3332
use ptr::RawPtr;
3433
use to_str::ToStr;
3534
use uint;
36-
use unstable::raw::{Repr, Slice};
3735
use vec;
3836
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
3937
use default::Default;
@@ -185,23 +183,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
185183
fn concat(&self) -> ~str {
186184
if self.is_empty() { return ~""; }
187185

186+
// `len` calculation may overflow but push_str but will check boundaries
188187
let len = self.iter().map(|s| s.as_slice().len()).sum();
189188

190-
let mut s = with_capacity(len);
189+
let mut result = with_capacity(len);
191190

192-
unsafe {
193-
do s.as_mut_buf |buf, _| {
194-
let mut buf = buf;
195-
for ss in self.iter() {
196-
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
197-
ptr::copy_memory(buf, ssbuf, sslen);
198-
buf = buf.offset(sslen as int);
199-
}
200-
}
201-
}
202-
raw::set_len(&mut s, len);
191+
for s in self.iter() {
192+
result.push_str(s.as_slice())
203193
}
204-
s
194+
result
205195
}
206196

207197
/// Concatenate a vector of strings, placing a given separator between each.
@@ -212,34 +202,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
212202
if sep.is_empty() { return self.concat(); }
213203

214204
// this is wrong without the guarantee that `self` is non-empty
205+
// `len` calculation may overflow but push_str but will check boundaries
215206
let len = sep.len() * (self.len() - 1)
216207
+ self.iter().map(|s| s.as_slice().len()).sum();
217-
let mut s = ~"";
208+
let mut result = with_capacity(len);
218209
let mut first = true;
219210

220-
s.reserve(len);
221-
222-
unsafe {
223-
do s.as_mut_buf |buf, _| {
224-
do sep.as_imm_buf |sepbuf, seplen| {
225-
let mut buf = buf;
226-
for ss in self.iter() {
227-
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
228-
if first {
229-
first = false;
230-
} else {
231-
ptr::copy_memory(buf, sepbuf, seplen);
232-
buf = buf.offset(seplen as int);
233-
}
234-
ptr::copy_memory(buf, ssbuf, sslen);
235-
buf = buf.offset(sslen as int);
236-
}
237-
}
238-
}
211+
for s in self.iter() {
212+
if first {
213+
first = false;
214+
} else {
215+
result.push_str(sep);
239216
}
240-
raw::set_len(&mut s, len);
217+
result.push_str(s.as_slice());
241218
}
242-
s
219+
result
243220
}
244221
}
245222

@@ -961,7 +938,6 @@ static TAG_CONT_U8: u8 = 128u8;
961938

962939
/// Unsafe operations
963940
pub mod raw {
964-
use option::Some;
965941
use cast;
966942
use libc;
967943
use ptr;
@@ -1064,21 +1040,22 @@ pub mod raw {
10641040
}
10651041
}
10661042

1067-
/// Appends a byte to a string. (Not UTF-8 safe).
1043+
/// Appends a byte to a string.
1044+
/// The caller must preserve the valid UTF-8 property.
10681045
#[inline]
10691046
pub unsafe fn push_byte(s: &mut ~str, b: u8) {
1070-
let v: &mut ~[u8] = cast::transmute(s);
1071-
v.push(b);
1047+
as_owned_vec(s).push(b)
10721048
}
10731049

1074-
/// Appends a vector of bytes to a string. (Not UTF-8 safe).
1075-
unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1076-
let new_len = s.len() + bytes.len();
1077-
s.reserve_at_least(new_len);
1078-
for byte in bytes.iter() { push_byte(&mut *s, *byte); }
1050+
/// Appends a vector of bytes to a string.
1051+
/// The caller must preserve the valid UTF-8 property.
1052+
#[inline]
1053+
pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1054+
vec::bytes::push_bytes(as_owned_vec(s), bytes);
10791055
}
10801056

1081-
/// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1057+
/// Removes the last byte from a string and returns it.
1058+
/// The caller must preserve the valid UTF-8 property.
10821059
pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
10831060
let len = s.len();
10841061
assert!((len > 0u));
@@ -1087,7 +1064,8 @@ pub mod raw {
10871064
return b;
10881065
}
10891066

1090-
/// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1067+
/// Removes the first byte from a string and returns it.
1068+
/// The caller must preserve the valid UTF-8 property.
10911069
pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
10921070
let len = s.len();
10931071
assert!((len > 0u));
@@ -1096,15 +1074,21 @@ pub mod raw {
10961074
return b;
10971075
}
10981076

1077+
/// Access the str in its vector representation.
1078+
/// The caller must preserve the valid UTF-8 property when modifying.
1079+
#[inline]
1080+
pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
1081+
cast::transmute(s)
1082+
}
1083+
10991084
/// Sets the length of a string
11001085
///
11011086
/// This will explicitly set the size of the string, without actually
11021087
/// modifying its buffers, so it is up to the caller to ensure that
11031088
/// the string is actually the specified size.
11041089
#[inline]
11051090
pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
1106-
let v: &mut ~[u8] = cast::transmute(s);
1107-
vec::raw::set_len(v, new_len)
1091+
vec::raw::set_len(as_owned_vec(s), new_len)
11081092
}
11091093

11101094
/// Sets the length of a string
@@ -2061,22 +2045,11 @@ impl<'self> StrSlice<'self> for &'self str {
20612045
20622046
/// Given a string, make a new string with repeated copies of it.
20632047
fn repeat(&self, nn: uint) -> ~str {
2064-
do self.as_imm_buf |buf, len| {
2065-
let mut ret = with_capacity(nn * len);
2066-
2067-
unsafe {
2068-
do ret.as_mut_buf |rbuf, _len| {
2069-
let mut rbuf = rbuf;
2070-
2071-
do nn.times {
2072-
ptr::copy_memory(rbuf, buf, len);
2073-
rbuf = rbuf.offset(len as int);
2074-
}
2075-
}
2076-
raw::set_len(&mut ret, nn * len);
2077-
}
2078-
ret
2048+
let mut ret = with_capacity(nn * self.len());
2049+
for _ in range(0, nn) {
2050+
ret.push_str(*self);
20792051
}
2052+
ret
20802053
}
20812054
20822055
/// Retrieves the first character from a string slice and returns
@@ -2199,54 +2172,35 @@ impl OwnedStr for ~str {
21992172
/// Appends a string slice to the back of a string, without overallocating
22002173
#[inline]
22012174
fn push_str_no_overallocate(&mut self, rhs: &str) {
2202-
unsafe {
2203-
let llen = self.len();
2204-
let rlen = rhs.len();
2205-
self.reserve(llen + rlen);
2206-
do self.as_imm_buf |lbuf, _llen| {
2207-
do rhs.as_imm_buf |rbuf, _rlen| {
2208-
let dst = ptr::offset(lbuf, llen as int);
2209-
let dst = cast::transmute_mut_unsafe(dst);
2210-
ptr::copy_memory(dst, rbuf, rlen);
2211-
}
2212-
}
2213-
raw::set_len(self, llen + rlen);
2214-
}
2175+
let new_cap = self.len() + rhs.len();
2176+
self.reserve(new_cap);
2177+
self.push_str(rhs);
22152178
}
22162179
22172180
/// Appends a string slice to the back of a string
22182181
#[inline]
22192182
fn push_str(&mut self, rhs: &str) {
22202183
unsafe {
2221-
let llen = self.len();
2222-
let rlen = rhs.len();
2223-
self.reserve_at_least(llen + rlen);
2224-
do self.as_imm_buf |lbuf, _llen| {
2225-
do rhs.as_imm_buf |rbuf, _rlen| {
2226-
let dst = ptr::offset(lbuf, llen as int);
2227-
let dst = cast::transmute_mut_unsafe(dst);
2228-
ptr::copy_memory(dst, rbuf, rlen);
2229-
}
2230-
}
2231-
raw::set_len(self, llen + rlen);
2184+
raw::push_bytes(self, rhs.as_bytes());
22322185
}
22332186
}
22342187
22352188
/// Appends a character to the back of a string
22362189
#[inline]
22372190
fn push_char(&mut self, c: char) {
22382191
let cur_len = self.len();
2239-
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
2240-
2241-
// Attempt to not use an intermediate buffer by just pushing bytes
2242-
// directly onto this string.
2192+
// may use up to 4 bytes.
22432193
unsafe {
2244-
let v = self.repr();
2245-
let len = c.encode_utf8(cast::transmute(Slice {
2246-
data: ((&(*v).data) as *u8).offset(cur_len as int),
2247-
len: 4,
2248-
}));
2249-
raw::set_len(self, cur_len + len);
2194+
raw::as_owned_vec(self).reserve_additional(4);
2195+
2196+
// Attempt to not use an intermediate buffer by just pushing bytes
2197+
// directly onto this string.
2198+
let used = do self.as_mut_buf |buf, _| {
2199+
do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
2200+
c.encode_utf8(slc)
2201+
}
2202+
};
2203+
raw::set_len(self, cur_len + used);
22502204
}
22512205
}
22522206
@@ -2306,8 +2260,7 @@ impl OwnedStr for ~str {
23062260
#[inline]
23072261
fn reserve(&mut self, n: uint) {
23082262
unsafe {
2309-
let v: &mut ~[u8] = cast::transmute(self);
2310-
(*v).reserve(n);
2263+
raw::as_owned_vec(self).reserve(n)
23112264
}
23122265
}
23132266
@@ -2329,7 +2282,7 @@ impl OwnedStr for ~str {
23292282
/// * n - The number of bytes to reserve space for
23302283
#[inline]
23312284
fn reserve_at_least(&mut self, n: uint) {
2332-
self.reserve(uint::next_power_of_two(n))
2285+
self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n))
23332286
}
23342287
23352288
/// Returns the number of single-byte characters the string can hold without
@@ -2359,8 +2312,9 @@ impl OwnedStr for ~str {
23592312
23602313
#[inline]
23612314
fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2362-
let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2363-
v.as_mut_buf(f)
2315+
unsafe {
2316+
raw::as_owned_vec(self).as_mut_buf(f)
2317+
}
23642318
}
23652319
}
23662320
@@ -3912,4 +3866,23 @@ mod bench {
39123866
with_capacity(100);
39133867
}
39143868
}
3869+
3870+
#[bench]
3871+
fn bench_push_str(bh: &mut BenchHarness) {
3872+
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3873+
do bh.iter {
3874+
let mut r = ~"";
3875+
r.push_str(s);
3876+
}
3877+
}
3878+
3879+
#[bench]
3880+
fn bench_connect(bh: &mut BenchHarness) {
3881+
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3882+
let sep = "→";
3883+
let v = [s, s, s, s, s, s, s, s, s, s];
3884+
do bh.iter {
3885+
assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
3886+
}
3887+
}
39153888
}

0 commit comments

Comments
 (0)