Skip to content

Commit 41af3e1

Browse files
committed
Refactor URL sanitization code for better readability.
1 parent b60508a commit 41af3e1

File tree

1 file changed

+71
-76
lines changed

1 file changed

+71
-76
lines changed

src/render.rs

Lines changed: 71 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use ammonia::{Builder, UrlRelative};
1+
//! Render README files to HTML.
2+
3+
use ammonia::{Builder, UrlRelative, UrlRelativeEvaluate};
24
use htmlescape::encode_minimal;
35
use std::borrow::Cow;
46
use std::path::Path;
@@ -83,89 +85,16 @@ impl<'a> MarkdownRenderer<'a> {
8385
"yaml",
8486
]),
8587
)]);
86-
87-
let sanitizer_base_url = base_url.map(ToString::to_string);
88-
89-
// Constrain the type of the closures given to the HTML sanitizer.
90-
fn constrain_closure<F>(f: F) -> F
91-
where
92-
F: for<'a> Fn(&'a str) -> Option<Cow<'a, str>> + Send + Sync,
93-
{
94-
f
95-
}
96-
97-
let unrelative_url_sanitizer = constrain_closure(|url| {
98-
// We have no base URL; allow fragment links only.
99-
if url.starts_with('#') {
100-
return Some(Cow::Borrowed(url));
101-
}
102-
103-
None
104-
});
105-
106-
fn is_media_url(url: &str) -> bool {
107-
Path::new(url)
108-
.extension()
109-
.and_then(std::ffi::OsStr::to_str)
110-
.map_or(false, |e| match e {
111-
"png" | "svg" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" => true,
112-
_ => false,
113-
})
114-
}
115-
116-
let relative_url_sanitizer = constrain_closure(move |url| {
117-
// sanitizer_base_url is Some(String); use it to fix the relative URL.
118-
if url.starts_with('#') {
119-
return Some(Cow::Borrowed(url));
120-
}
121-
122-
let mut new_url = sanitizer_base_url.clone().unwrap();
123-
if !new_url.ends_with('/') {
124-
new_url.push('/');
125-
}
126-
if new_url.ends_with(".git/") {
127-
let offset = new_url.len() - 5;
128-
new_url.drain(offset..offset + 4);
129-
}
130-
// Assumes GitHub’s URL scheme. GitHub renders text and markdown
131-
// better in the "blob" view, but images need to be served raw.
132-
new_url += if is_media_url(url) {
133-
"raw/master"
134-
} else {
135-
"blob/master"
136-
};
137-
if !url.starts_with('/') {
138-
new_url.push('/');
139-
}
140-
new_url += url;
141-
Some(Cow::Owned(new_url))
142-
});
143-
144-
let use_relative = if let Some(base_url) = base_url {
145-
if let Ok(url) = Url::parse(base_url) {
146-
url.host_str() == Some("github.com")
147-
|| url.host_str() == Some("gitlab.com")
148-
|| url.host_str() == Some("bitbucket.org")
149-
} else {
150-
false
151-
}
152-
} else {
153-
false
154-
};
88+
let sanitize_url = UrlRelative::Custom(Box::new(SanitizeUrl::new(base_url)));
15589

15690
let mut html_sanitizer = Builder::new();
15791
html_sanitizer
15892
.link_rel(Some("nofollow noopener noreferrer"))
15993
.tags(tags)
16094
.tag_attributes(tag_attributes)
16195
.allowed_classes(allowed_classes)
162-
.url_relative(if use_relative {
163-
UrlRelative::Custom(Box::new(relative_url_sanitizer))
164-
} else {
165-
UrlRelative::Custom(Box::new(unrelative_url_sanitizer))
166-
})
96+
.url_relative(sanitize_url)
16797
.id_prefix(Some("user-content-"));
168-
16998
MarkdownRenderer { html_sanitizer }
17099
}
171100

@@ -186,6 +115,72 @@ impl<'a> MarkdownRenderer<'a> {
186115
}
187116
}
188117

118+
/// Add trailing slash and remove `.git` suffix of base URL.
119+
fn canon_base_url(mut base_url: String) -> String {
120+
if !base_url.ends_with('/') {
121+
base_url.push('/');
122+
}
123+
if base_url.ends_with(".git/") {
124+
let offset = base_url.len() - 5;
125+
base_url.drain(offset..offset + 4);
126+
}
127+
base_url
128+
}
129+
130+
/// Sanitize relative URLs in README files.
131+
struct SanitizeUrl {
132+
base_url: Option<String>,
133+
}
134+
135+
impl SanitizeUrl {
136+
fn new(base_url: Option<&str>) -> Self {
137+
let base_url = base_url
138+
.and_then(|base_url| Url::parse(base_url).ok())
139+
.and_then(|url| match url.host_str() {
140+
Some("github.com") | Some("gitlab.com") | Some("bitbucket.org") => {
141+
Some(canon_base_url(url.into_string()))
142+
}
143+
_ => None,
144+
});
145+
Self { base_url }
146+
}
147+
}
148+
149+
/// Determine whether the given URL has a media file externsion.
150+
fn is_media_url(url: &str) -> bool {
151+
Path::new(url)
152+
.extension()
153+
.and_then(std::ffi::OsStr::to_str)
154+
.map_or(false, |e| match e {
155+
"png" | "svg" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" => true,
156+
_ => false,
157+
})
158+
}
159+
160+
impl UrlRelativeEvaluate for SanitizeUrl {
161+
fn evaluate<'a>(&self, url: &'a str) -> Option<Cow<'a, str>> {
162+
if url.starts_with('#') {
163+
// Always allow fragment URLs.
164+
return Some(Cow::Borrowed(url));
165+
}
166+
self.base_url.as_ref().map(|base_url| {
167+
let mut new_url = base_url.clone();
168+
// Assumes GitHub’s URL scheme. GitHub renders text and markdown
169+
// better in the "blob" view, but images need to be served raw.
170+
new_url += if is_media_url(url) {
171+
"raw/master"
172+
} else {
173+
"blob/master"
174+
};
175+
if !url.starts_with('/') {
176+
new_url.push('/');
177+
}
178+
new_url += url;
179+
Cow::Owned(new_url)
180+
})
181+
}
182+
}
183+
189184
/// Renders Markdown text to sanitized HTML with a given `base_url`.
190185
/// See `readme_to_html` for the interpretation of `base_url`.
191186
fn markdown_to_html(text: &str, base_url: Option<&str>) -> CargoResult<String> {

0 commit comments

Comments
 (0)