1
- use ammonia:: { Builder , UrlRelative } ;
1
+ //! Render README files to HTML.
2
+
3
+ use ammonia:: { Builder , UrlRelative , UrlRelativeEvaluate } ;
2
4
use htmlescape:: encode_minimal;
3
5
use std:: borrow:: Cow ;
4
6
use std:: path:: Path ;
@@ -20,7 +22,7 @@ impl<'a> MarkdownRenderer<'a> {
20
22
/// Per `readme_to_html`, `base_url` is the base URL prepended to any
21
23
/// relative links in the input document. See that function for more detail.
22
24
fn new ( base_url : Option < & ' a str > ) -> MarkdownRenderer < ' a > {
23
- let tags = [
25
+ let tags = hashset ( & [
24
26
"a" ,
25
27
"b" ,
26
28
"blockquote" ,
@@ -60,30 +62,15 @@ impl<'a> MarkdownRenderer<'a> {
60
62
"ul" ,
61
63
"hr" ,
62
64
"span" ,
63
- ]
64
- . iter ( )
65
- . cloned ( )
66
- . collect ( ) ;
67
- let tag_attributes = [
68
- ( "a" , [ "href" , "id" , "target" ] . iter ( ) . cloned ( ) . collect ( ) ) ,
69
- (
70
- "img" ,
71
- [ "width" , "height" , "src" , "alt" , "align" ]
72
- . iter ( )
73
- . cloned ( )
74
- . collect ( ) ,
75
- ) ,
76
- (
77
- "input" ,
78
- [ "checked" , "disabled" , "type" ] . iter ( ) . cloned ( ) . collect ( ) ,
79
- ) ,
80
- ]
81
- . iter ( )
82
- . cloned ( )
83
- . collect ( ) ;
84
- let allowed_classes = [ (
65
+ ] ) ;
66
+ let tag_attributes = hashmap ( & [
67
+ ( "a" , hashset ( & [ "href" , "id" , "target" ] ) ) ,
68
+ ( "img" , hashset ( & [ "width" , "height" , "src" , "alt" , "align" ] ) ) ,
69
+ ( "input" , hashset ( & [ "checked" , "disabled" , "type" ] ) ) ,
70
+ ] ) ;
71
+ let allowed_classes = hashmap ( & [ (
85
72
"code" ,
86
- [
73
+ hashset ( & [
87
74
"language-bash" ,
88
75
"language-clike" ,
89
76
"language-glsl" ,
@@ -98,97 +85,18 @@ impl<'a> MarkdownRenderer<'a> {
98
85
"language-scss" ,
99
86
"language-sql" ,
100
87
"yaml" ,
101
- ]
102
- . iter ( )
103
- . cloned ( )
104
- . collect ( ) ,
105
- ) ]
106
- . iter ( )
107
- . cloned ( )
108
- . collect ( ) ;
109
-
110
- let sanitizer_base_url = base_url. map ( ToString :: to_string) ;
111
-
112
- // Constrain the type of the closures given to the HTML sanitizer.
113
- fn constrain_closure < F > ( f : F ) -> F
114
- where
115
- F : for < ' a > Fn ( & ' a str ) -> Option < Cow < ' a , str > > + Send + Sync ,
116
- {
117
- f
118
- }
119
-
120
- let unrelative_url_sanitizer = constrain_closure ( |url| {
121
- // We have no base URL; allow fragment links only.
122
- if url. starts_with ( '#' ) {
123
- return Some ( Cow :: Borrowed ( url) ) ;
124
- }
125
-
126
- None
127
- } ) ;
128
-
129
- fn is_media_url ( url : & str ) -> bool {
130
- Path :: new ( url)
131
- . extension ( )
132
- . and_then ( std:: ffi:: OsStr :: to_str)
133
- . map_or ( false , |e| match e {
134
- "png" | "svg" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" => true ,
135
- _ => false ,
136
- } )
137
- }
138
-
139
- let relative_url_sanitizer = constrain_closure ( move |url| {
140
- // sanitizer_base_url is Some(String); use it to fix the relative URL.
141
- if url. starts_with ( '#' ) {
142
- return Some ( Cow :: Borrowed ( url) ) ;
143
- }
144
-
145
- let mut new_url = sanitizer_base_url. clone ( ) . unwrap ( ) ;
146
- if !new_url. ends_with ( '/' ) {
147
- new_url. push ( '/' ) ;
148
- }
149
- if new_url. ends_with ( ".git/" ) {
150
- let offset = new_url. len ( ) - 5 ;
151
- new_url. drain ( offset..offset + 4 ) ;
152
- }
153
- // Assumes GitHub’s URL scheme. GitHub renders text and markdown
154
- // better in the "blob" view, but images need to be served raw.
155
- new_url += if is_media_url ( url) {
156
- "raw/master"
157
- } else {
158
- "blob/master"
159
- } ;
160
- if !url. starts_with ( '/' ) {
161
- new_url. push ( '/' ) ;
162
- }
163
- new_url += url;
164
- Some ( Cow :: Owned ( new_url) )
165
- } ) ;
166
-
167
- let use_relative = if let Some ( base_url) = base_url {
168
- if let Ok ( url) = Url :: parse ( base_url) {
169
- url. host_str ( ) == Some ( "github.com" )
170
- || url. host_str ( ) == Some ( "gitlab.com" )
171
- || url. host_str ( ) == Some ( "bitbucket.org" )
172
- } else {
173
- false
174
- }
175
- } else {
176
- false
177
- } ;
88
+ ] ) ,
89
+ ) ] ) ;
90
+ let sanitize_url = UrlRelative :: Custom ( Box :: new ( SanitizeUrl :: new ( base_url) ) ) ;
178
91
179
92
let mut html_sanitizer = Builder :: new ( ) ;
180
93
html_sanitizer
181
94
. link_rel ( Some ( "nofollow noopener noreferrer" ) )
182
95
. tags ( tags)
183
96
. tag_attributes ( tag_attributes)
184
97
. allowed_classes ( allowed_classes)
185
- . url_relative ( if use_relative {
186
- UrlRelative :: Custom ( Box :: new ( relative_url_sanitizer) )
187
- } else {
188
- UrlRelative :: Custom ( Box :: new ( unrelative_url_sanitizer) )
189
- } )
98
+ . url_relative ( sanitize_url)
190
99
. id_prefix ( Some ( "user-content-" ) ) ;
191
-
192
100
MarkdownRenderer { html_sanitizer }
193
101
}
194
102
@@ -209,6 +117,72 @@ impl<'a> MarkdownRenderer<'a> {
209
117
}
210
118
}
211
119
120
+ /// Add trailing slash and remove `.git` suffix of base URL.
121
+ fn canon_base_url ( mut base_url : String ) -> String {
122
+ if !base_url. ends_with ( '/' ) {
123
+ base_url. push ( '/' ) ;
124
+ }
125
+ if base_url. ends_with ( ".git/" ) {
126
+ let offset = base_url. len ( ) - 5 ;
127
+ base_url. drain ( offset..offset + 4 ) ;
128
+ }
129
+ base_url
130
+ }
131
+
132
+ /// Sanitize relative URLs in README files.
133
+ struct SanitizeUrl {
134
+ base_url : Option < String > ,
135
+ }
136
+
137
+ impl SanitizeUrl {
138
+ fn new ( base_url : Option < & str > ) -> Self {
139
+ let base_url = base_url
140
+ . and_then ( |base_url| Url :: parse ( base_url) . ok ( ) )
141
+ . and_then ( |url| match url. host_str ( ) {
142
+ Some ( "github.com" ) | Some ( "gitlab.com" ) | Some ( "bitbucket.org" ) => {
143
+ Some ( canon_base_url ( url. into_string ( ) ) )
144
+ }
145
+ _ => None ,
146
+ } ) ;
147
+ Self { base_url }
148
+ }
149
+ }
150
+
151
+ /// Determine whether the given URL has a media file externsion.
152
+ fn is_media_url ( url : & str ) -> bool {
153
+ Path :: new ( url)
154
+ . extension ( )
155
+ . and_then ( std:: ffi:: OsStr :: to_str)
156
+ . map_or ( false , |e| match e {
157
+ "png" | "svg" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" => true ,
158
+ _ => false ,
159
+ } )
160
+ }
161
+
162
+ impl UrlRelativeEvaluate for SanitizeUrl {
163
+ fn evaluate < ' a > ( & self , url : & ' a str ) -> Option < Cow < ' a , str > > {
164
+ if url. starts_with ( '#' ) {
165
+ // Always allow fragment URLs.
166
+ return Some ( Cow :: Borrowed ( url) ) ;
167
+ }
168
+ self . base_url . as_ref ( ) . map ( |base_url| {
169
+ let mut new_url = base_url. clone ( ) ;
170
+ // Assumes GitHub’s URL scheme. GitHub renders text and markdown
171
+ // better in the "blob" view, but images need to be served raw.
172
+ new_url += if is_media_url ( url) {
173
+ "raw/master"
174
+ } else {
175
+ "blob/master"
176
+ } ;
177
+ if !url. starts_with ( '/' ) {
178
+ new_url. push ( '/' ) ;
179
+ }
180
+ new_url += url;
181
+ Cow :: Owned ( new_url)
182
+ } )
183
+ }
184
+ }
185
+
212
186
/// Renders Markdown text to sanitized HTML with a given `base_url`.
213
187
/// See `readme_to_html` for the interpretation of `base_url`.
214
188
fn markdown_to_html ( text : & str , base_url : Option < & str > ) -> String {
@@ -286,6 +260,23 @@ pub fn render_and_upload_readme(
286
260
} )
287
261
}
288
262
263
+ /// Helper function to build a new `HashSet` from the items slice.
264
+ fn hashset < T > ( items : & [ T ] ) -> std:: collections:: HashSet < T >
265
+ where
266
+ T : Clone + Eq + std:: hash:: Hash ,
267
+ {
268
+ items. iter ( ) . cloned ( ) . collect ( )
269
+ }
270
+
271
+ /// Helper function to build a new `HashMap` from a slice of key-value pairs.
272
+ fn hashmap < K , V > ( items : & [ ( K , V ) ] ) -> std:: collections:: HashMap < K , V >
273
+ where
274
+ K : Clone + Eq + std:: hash:: Hash ,
275
+ V : Clone ,
276
+ {
277
+ items. iter ( ) . cloned ( ) . collect ( )
278
+ }
279
+
289
280
#[ cfg( test) ]
290
281
mod tests {
291
282
use super :: * ;
0 commit comments