diff --git a/Cargo.toml b/Cargo.toml index d3820e1a5..a95afc870 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "url" -version = "0.4.0" +version = "0.5.0" authors = [ "Simon Sapin " ] description = "URL library for Rust, based on the WHATWG URL Standard" diff --git a/src/host.rs b/src/host.rs index c65aa6d33..41702e2d7 100644 --- a/src/host.rs +++ b/src/host.rs @@ -9,6 +9,7 @@ use std::ascii::AsciiExt; use std::cmp; use std::fmt::{self, Formatter}; +use std::net::{Ipv4Addr, Ipv6Addr}; use parser::{ParseResult, ParseError}; use percent_encoding::{from_hex, percent_decode}; @@ -17,26 +18,15 @@ use percent_encoding::{from_hex, percent_decode}; #[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)] #[cfg_attr(feature="heap_size", derive(HeapSizeOf))] pub enum Host { - /// A (DNS) domain name or an IPv4 address. - /// - /// FIXME: IPv4 probably should be a separate variant. - /// See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26431 + /// A (DNS) domain name. Domain(String), - + /// A IPv4 address, represented by four sequences of up to three ASCII digits. + Ipv4(Ipv4Addr), /// An IPv6 address, represented inside `[...]` square brackets /// so that `:` colon characters in the address are not ambiguous /// with the port number delimiter. - Ipv6(Ipv6Address), -} - - -/// A 128 bit IPv6 address -#[derive(Clone, Eq, PartialEq, Copy, Debug, Hash, PartialOrd, Ord)] -pub struct Ipv6Address { - pub pieces: [u16; 8] + Ipv6(Ipv6Addr), } -#[cfg(feature="heap_size")] -known_heap_size!(0, Ipv6Address); impl Host { @@ -48,26 +38,28 @@ impl Host { /// FIXME: Add IDNA support for non-ASCII domains. pub fn parse(input: &str) -> ParseResult { if input.len() == 0 { - Err(ParseError::EmptyHost) - } else if input.starts_with("[") { - if input.ends_with("]") { - Ipv6Address::parse(&input[1..input.len() - 1]).map(Host::Ipv6) - } else { - Err(ParseError::InvalidIpv6Address) - } - } else { - let decoded = percent_decode(input.as_bytes()); - let domain = String::from_utf8_lossy(&decoded); - // TODO: Remove this check and use IDNA "domain to ASCII" - if !domain.is_ascii() { - Err(ParseError::NonAsciiDomainsNotSupportedYet) - } else if domain.find(&[ - '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']' - ][..]).is_some() { - Err(ParseError::InvalidDomainCharacter) - } else { - Ok(Host::Domain(domain.to_ascii_lowercase())) + return Err(ParseError::EmptyHost) + } + if input.starts_with("[") { + if !input.ends_with("]") { + return Err(ParseError::InvalidIpv6Address) } + return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6) + } + let decoded = percent_decode(input.as_bytes()); + let domain = String::from_utf8_lossy(&decoded); + // TODO: Remove this check and use IDNA "domain to ASCII" + if !domain.is_ascii() { + return Err(ParseError::NonAsciiDomainsNotSupportedYet) + } else if domain.find(&[ + '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']' + ][..]).is_some() { + return Err(ParseError::InvalidDomainCharacter) + } + match parse_ipv4addr(&domain[..]) { + Ok(Some(ipv4addr)) => Ok(Host::Ipv4(ipv4addr)), + Ok(None) => Ok(Host::Domain(domain.to_ascii_lowercase())), + Err(e) => Err(e), } } @@ -81,203 +73,186 @@ impl Host { impl fmt::Display for Host { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { - Host::Domain(ref domain) => domain.fmt(formatter), - Host::Ipv6(ref address) => { - try!(formatter.write_str("[")); - try!(address.fmt(formatter)); - formatter.write_str("]") - } + Host::Domain(ref domain) => domain.fmt(f), + Host::Ipv4(ref addr) => addr.fmt(f), + Host::Ipv6(ref addr) => write!(f, "[{}]", addr), + } + } +} + +fn parse_ipv4number(mut input: &str) -> ParseResult { + let mut r = 10; + if input.starts_with("0x") || input.starts_with("0X") { + input = &input[2..]; + r = 16; + } else if input.len() >= 2 && input.starts_with("0") { + input = &input[1..]; + r = 8; + } + if input.is_empty() { + return Ok(0); + } + match u32::from_str_radix(&input, r) { + Ok(number) => return Ok(number), + Err(_) => Err(ParseError::InvalidIpv4Address), + } +} + +fn parse_ipv4addr(input: &str) -> ParseResult> { + let mut parts: Vec<&str> = input.split('.').collect(); + if parts.last() == Some(&"") { + parts.pop(); + } + if parts.len() > 4 { + return Ok(None); + } + let mut numbers: Vec = Vec::new(); + for part in parts { + if part == "" { + return Ok(None); } + if let Ok(n) = parse_ipv4number(part) { + numbers.push(n); + } else { + return Ok(None); + } + } + let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); + if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { + return Err(ParseError::InvalidIpv4Address); } + if numbers.iter().any(|x| *x > 255) { + return Err(ParseError::InvalidIpv4Address); + } + for (counter, n) in numbers.iter().enumerate() { + ipv4 += n << (8 * (3 - counter as u32)) + } + Ok(Some(Ipv4Addr::from(ipv4))) } -impl Ipv6Address { - /// Parse an IPv6 address, without the [] square brackets. - pub fn parse(input: &str) -> ParseResult { - let input = input.as_bytes(); - let len = input.len(); - let mut is_ip_v4 = false; - let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; - let mut piece_pointer = 0; - let mut compress_pointer = None; - let mut i = 0; +fn parse_ipv6addr(input: &str) -> ParseResult { + let input = input.as_bytes(); + let len = input.len(); + let mut is_ip_v4 = false; + let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; + let mut piece_pointer = 0; + let mut compress_pointer = None; + let mut i = 0; + + if len < 2 { + return Err(ParseError::InvalidIpv6Address) + } - if len < 2 { + if input[0] == b':' { + if input[1] != b':' { return Err(ParseError::InvalidIpv6Address) } + i = 2; + piece_pointer = 1; + compress_pointer = Some(1); + } - if input[0] == b':' { - if input[1] != b':' { - return Err(ParseError::InvalidIpv6Address) - } - i = 2; - piece_pointer = 1; - compress_pointer = Some(1); + while i < len { + if piece_pointer == 8 { + return Err(ParseError::InvalidIpv6Address) } - - while i < len { - if piece_pointer == 8 { + if input[i] == b':' { + if compress_pointer.is_some() { return Err(ParseError::InvalidIpv6Address) } - if input[i] == b':' { - if compress_pointer.is_some() { - return Err(ParseError::InvalidIpv6Address) - } - i += 1; - piece_pointer += 1; - compress_pointer = Some(piece_pointer); - continue - } - let start = i; - let end = cmp::min(len, start + 4); - let mut value = 0u16; - while i < end { - match from_hex(input[i]) { - Some(digit) => { - value = value * 0x10 + digit as u16; - i += 1; - }, - None => break - } - } - if i < len { - match input[i] { - b'.' => { - if i == start { - return Err(ParseError::InvalidIpv6Address) - } - i = start; - is_ip_v4 = true; - }, - b':' => { - i += 1; - if i == len { - return Err(ParseError::InvalidIpv6Address) - } - }, - _ => return Err(ParseError::InvalidIpv6Address) - } - } - if is_ip_v4 { - break - } - pieces[piece_pointer] = value; + i += 1; piece_pointer += 1; + compress_pointer = Some(piece_pointer); + continue } - - if is_ip_v4 { - if piece_pointer > 6 { - return Err(ParseError::InvalidIpv6Address) + let start = i; + let end = cmp::min(len, start + 4); + let mut value = 0u16; + while i < end { + match from_hex(input[i]) { + Some(digit) => { + value = value * 0x10 + digit as u16; + i += 1; + }, + None => break } - let mut dots_seen = 0; - while i < len { - // FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064 - let mut value = 0u16; - while i < len { - let digit = match input[i] { - c @ b'0' ... b'9' => c - b'0', - _ => break - }; - value = value * 10 + digit as u16; - if value == 0 || value > 255 { + } + if i < len { + match input[i] { + b'.' => { + if i == start { return Err(ParseError::InvalidIpv6Address) } - } - if dots_seen < 3 && !(i < len && input[i] == b'.') { - return Err(ParseError::InvalidIpv6Address) - } - pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value; - if dots_seen == 0 || dots_seen == 2 { - piece_pointer += 1; - } - i += 1; - if dots_seen == 3 && i < len { - return Err(ParseError::InvalidIpv6Address) - } - dots_seen += 1; + i = start; + is_ip_v4 = true; + }, + b':' => { + i += 1; + if i == len { + return Err(ParseError::InvalidIpv6Address) + } + }, + _ => return Err(ParseError::InvalidIpv6Address) } } - - match compress_pointer { - Some(compress_pointer) => { - let mut swaps = piece_pointer - compress_pointer; - piece_pointer = 7; - while swaps > 0 { - pieces[piece_pointer] = pieces[compress_pointer + swaps - 1]; - pieces[compress_pointer + swaps - 1] = 0; - swaps -= 1; - piece_pointer -= 1; - } - } - _ => if piece_pointer != 8 { - return Err(ParseError::InvalidIpv6Address) - } + if is_ip_v4 { + break } - Ok(Ipv6Address { pieces: pieces }) + pieces[piece_pointer] = value; + piece_pointer += 1; } - /// Serialize the IPv6 address to a string. - pub fn serialize(&self) -> String { - self.to_string() - } -} - - -impl fmt::Display for Ipv6Address { - fn fmt(&self, formatter: &mut Formatter) -> fmt::Result { - let (compress_start, compress_end) = longest_zero_sequence(&self.pieces); - let mut i = 0; - while i < 8 { - if i == compress_start { - try!(formatter.write_str(":")); - if i == 0 { - try!(formatter.write_str(":")); - } - if compress_end < 8 { - i = compress_end; - } else { - break; + if is_ip_v4 { + if piece_pointer > 6 { + return Err(ParseError::InvalidIpv6Address) + } + let mut dots_seen = 0; + while i < len { + // FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064 + let mut value = 0u16; + while i < len { + let digit = match input[i] { + c @ b'0' ... b'9' => c - b'0', + _ => break + }; + value = value * 10 + digit as u16; + if value == 0 || value > 255 { + return Err(ParseError::InvalidIpv6Address) } } - try!(write!(formatter, "{:x}", self.pieces[i as usize])); - if i < 7 { - try!(formatter.write_str(":")); + if dots_seen < 3 && !(i < len && input[i] == b'.') { + return Err(ParseError::InvalidIpv6Address) + } + pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value; + if dots_seen == 0 || dots_seen == 2 { + piece_pointer += 1; } i += 1; + if dots_seen == 3 && i < len { + return Err(ParseError::InvalidIpv6Address) + } + dots_seen += 1; } - Ok(()) } -} - -fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { - let mut longest = -1; - let mut longest_length = -1; - let mut start = -1; - macro_rules! finish_sequence( - ($end: expr) => { - if start >= 0 { - let length = $end - start; - if length > longest_length { - longest = start; - longest_length = length; - } - } - }; - ); - for i in 0..8 { - if pieces[i as usize] == 0 { - if start < 0 { - start = i; + match compress_pointer { + Some(compress_pointer) => { + let mut swaps = piece_pointer - compress_pointer; + piece_pointer = 7; + while swaps > 0 { + pieces[piece_pointer] = pieces[compress_pointer + swaps - 1]; + pieces[compress_pointer + swaps - 1] = 0; + swaps -= 1; + piece_pointer -= 1; } - } else { - finish_sequence!(i); - start = -1; + } + _ => if piece_pointer != 8 { + return Err(ParseError::InvalidIpv6Address) } } - finish_sequence!(8); - (longest, longest + longest_length) + Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3], + pieces[4], pieces[5], pieces[6], pieces[7])) } diff --git a/src/lib.rs b/src/lib.rs index 08d32ef55..d46810857 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,7 +143,7 @@ use std::cmp::Ordering; #[cfg(feature="serde_serialization")] use std::str::FromStr; -pub use host::{Host, Ipv6Address}; +pub use host::Host; pub use parser::{ErrorHandler, ParseResult, ParseError}; use percent_encoding::{percent_encode, lossy_utf8_percent_decode, DEFAULT_ENCODE_SET}; @@ -1140,4 +1140,3 @@ fn file_url_path_to_pathbuf_windows(path: &[String]) -> Result { "to_file_path() failed to produce an absolute Path"); Ok(path) } - diff --git a/src/parser.rs b/src/parser.rs index b03023511..68b28d78b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -47,6 +47,7 @@ simple_enum_error! { EmptyHost => "empty host", InvalidScheme => "invalid scheme", InvalidPort => "invalid port number", + InvalidIpv4Address => "invalid IPv4 address", InvalidIpv6Address => "invalid IPv6 address", InvalidDomainCharacter => "invalid domain character", InvalidCharacter => "invalid character", diff --git a/src/tests.rs b/src/tests.rs index e25500f5c..c5d12c2c4 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -8,6 +8,7 @@ use std::char; +use std::net::{Ipv4Addr, Ipv6Addr}; use super::{UrlParser, Url, SchemeData, RelativeSchemeData, Host}; @@ -347,3 +348,21 @@ fn relative_scheme_data_equality() { let b: Url = url("http://foo.com/"); check_eq(&a, &b); } + +#[test] +fn host() { + let a = Host::parse("www.mozilla.org").unwrap(); + let b = Host::parse("1.35.33.49").unwrap(); + let c = Host::parse("[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]").unwrap(); + assert_eq!(a, Host::Domain("www.mozilla.org".to_owned())); + assert_eq!(b, Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); + assert_eq!(c, Host::Ipv6(Ipv6Addr::new(0x2001, 0x0db8, 0x85a3, 0x08d3, + 0x1319, 0x8a2e, 0x0370, 0x7344))); + assert_eq!(Host::parse("[::]").unwrap(), Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0))); + assert_eq!(Host::parse("[::1]").unwrap(), Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1))); + assert_eq!(Host::parse("0x1.0X23.0x21.061").unwrap(), Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); + assert_eq!(Host::parse("0x1232131").unwrap(), Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); + assert!(Host::parse("42.0x1232131").is_err()); + assert_eq!(Host::parse("111").unwrap(), Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111))); + assert_eq!(Host::parse("2..2.3").unwrap(), Host::Domain("2..2.3".to_owned())); +} diff --git a/src/urltestdata.txt b/src/urltestdata.txt index ece4e7131..04ea893f1 100644 --- a/src/urltestdata.txt +++ b/src/urltestdata.txt @@ -162,7 +162,7 @@ http://www.google.com/foo?bar=baz# about:blank s:http h:www.google.com p:/foo q: http://www.google.com/foo?bar=baz#\s\u00BB s:http h:www.google.com p:/foo q:?bar=baz f:#\s%C2%BB http://[www.google.com]/ http://www.google.com s:http h:www.google.com p:/ -http://192.0x00A80001 s:http h:192.0x00a80001 p:/ +http://192.0x00A80001 s:http h:192.168.0.1 p:/ http://www/foo%2Ehtml s:http h:www p:/foo%2Ehtml http://www/foo/%2E/html s:http h:www p:/foo/html http://user:pass@/