Skip to content

Commit b08a655

Browse files
authored
Normalize URL paths: convert /.//p, /..//p, and //p to p (#943)
1 parent ebd5cfb commit b08a655

File tree

2 files changed

+53
-5
lines changed

2 files changed

+53
-5
lines changed

url/src/lib.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1742,6 +1742,39 @@ impl Url {
17421742
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
17431743
let cannot_be_a_base = self.cannot_be_a_base();
17441744
let scheme_type = SchemeType::from(self.scheme());
1745+
let mut path_empty = false;
1746+
1747+
// Check ':' and then see if the next character is '/'
1748+
let mut has_host = if let Some(index) = self.serialization.find(":") {
1749+
if self.serialization.len() > index + 1
1750+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1751+
{
1752+
let rest = &self.serialization[(index + ":/".len())..];
1753+
let host_part = rest.split('/').next().unwrap_or("");
1754+
path_empty = rest.is_empty();
1755+
!host_part.is_empty() && !host_part.contains('@')
1756+
} else {
1757+
false
1758+
}
1759+
} else {
1760+
false
1761+
};
1762+
1763+
// Ensure the path length is greater than 1 to account
1764+
// for cases where "/." is already appended from serialization
1765+
// If we set path, then we already checked the other two conditions:
1766+
// https://url.spec.whatwg.org/#url-serializing
1767+
// 1. The host is null
1768+
// 2. the first segment of the URL's path is an empty string
1769+
if path.len() > 1 {
1770+
if let Some(index) = self.serialization.find(":") {
1771+
let removal_start = index + ":".len();
1772+
if self.serialization[removal_start..].starts_with("/.") {
1773+
self.path_start -= "/.".len() as u32;
1774+
}
1775+
}
1776+
}
1777+
17451778
self.serialization.truncate(self.path_start as usize);
17461779
self.mutate(|parser| {
17471780
if cannot_be_a_base {
@@ -1751,14 +1784,33 @@ impl Url {
17511784
}
17521785
parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
17531786
} else {
1754-
let mut has_host = true; // FIXME
17551787
parser.parse_path_start(
17561788
scheme_type,
17571789
&mut has_host,
17581790
parser::Input::new_no_trim(path),
17591791
);
17601792
}
17611793
});
1794+
1795+
// For cases where normalization is applied across both the serialization and the path.
1796+
// Append "/." immediately after the scheme (up to ":")
1797+
// This is done if three conditions are met.
1798+
// https://url.spec.whatwg.org/#url-serializing
1799+
// 1. The host is null
1800+
// 2. The url's path length is greater than 1
1801+
// 3. the first segment of the URL's path is an empty string
1802+
if !has_host && path.len() > 1 && path_empty {
1803+
if let Some(index) = self.serialization.find(":") {
1804+
if self.serialization.len() > index + 2
1805+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1806+
&& self.serialization.as_bytes().get(index + 2) == Some(&b'/')
1807+
{
1808+
self.serialization.insert_str(index + ":".len(), "/.");
1809+
self.path_start += "/.".len() as u32;
1810+
}
1811+
}
1812+
}
1813+
17621814
self.restore_after_path(old_after_path_pos, &after_path);
17631815
}
17641816

url/tests/expected_failures.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,3 @@
4343
<file://monkey/> set pathname to <\\\\>
4444
<file:///unicorn> set pathname to <//\\/>
4545
<file:///unicorn> set pathname to <//monkey/..//>
46-
<non-spec:/> set pathname to </.//p>
47-
<non-spec:/> set pathname to </..//p>
48-
<non-spec:/> set pathname to <//p>
49-
<non-spec:/.//> set pathname to <p>

0 commit comments

Comments
 (0)