1
- use ammonia:: { Builder , UrlRelative } ;
1
+ //! Render README files to HTML.
2
+
3
+ use ammonia:: { Builder , UrlRelative , UrlRelativeEvaluate } ;
2
4
use htmlescape:: encode_minimal;
3
5
use std:: borrow:: Cow ;
4
6
use std:: path:: Path ;
@@ -83,89 +85,16 @@ impl<'a> MarkdownRenderer<'a> {
83
85
"yaml" ,
84
86
] ) ,
85
87
) ] ) ;
86
-
87
- let sanitizer_base_url = base_url. map ( ToString :: to_string) ;
88
-
89
- // Constrain the type of the closures given to the HTML sanitizer.
90
- fn constrain_closure < F > ( f : F ) -> F
91
- where
92
- F : for < ' a > Fn ( & ' a str ) -> Option < Cow < ' a , str > > + Send + Sync ,
93
- {
94
- f
95
- }
96
-
97
- let unrelative_url_sanitizer = constrain_closure ( |url| {
98
- // We have no base URL; allow fragment links only.
99
- if url. starts_with ( '#' ) {
100
- return Some ( Cow :: Borrowed ( url) ) ;
101
- }
102
-
103
- None
104
- } ) ;
105
-
106
- fn is_media_url ( url : & str ) -> bool {
107
- Path :: new ( url)
108
- . extension ( )
109
- . and_then ( std:: ffi:: OsStr :: to_str)
110
- . map_or ( false , |e| match e {
111
- "png" | "svg" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" => true ,
112
- _ => false ,
113
- } )
114
- }
115
-
116
- let relative_url_sanitizer = constrain_closure ( move |url| {
117
- // sanitizer_base_url is Some(String); use it to fix the relative URL.
118
- if url. starts_with ( '#' ) {
119
- return Some ( Cow :: Borrowed ( url) ) ;
120
- }
121
-
122
- let mut new_url = sanitizer_base_url. clone ( ) . unwrap ( ) ;
123
- if !new_url. ends_with ( '/' ) {
124
- new_url. push ( '/' ) ;
125
- }
126
- if new_url. ends_with ( ".git/" ) {
127
- let offset = new_url. len ( ) - 5 ;
128
- new_url. drain ( offset..offset + 4 ) ;
129
- }
130
- // Assumes GitHub’s URL scheme. GitHub renders text and markdown
131
- // better in the "blob" view, but images need to be served raw.
132
- new_url += if is_media_url ( url) {
133
- "raw/master"
134
- } else {
135
- "blob/master"
136
- } ;
137
- if !url. starts_with ( '/' ) {
138
- new_url. push ( '/' ) ;
139
- }
140
- new_url += url;
141
- Some ( Cow :: Owned ( new_url) )
142
- } ) ;
143
-
144
- let use_relative = if let Some ( base_url) = base_url {
145
- if let Ok ( url) = Url :: parse ( base_url) {
146
- url. host_str ( ) == Some ( "github.com" )
147
- || url. host_str ( ) == Some ( "gitlab.com" )
148
- || url. host_str ( ) == Some ( "bitbucket.org" )
149
- } else {
150
- false
151
- }
152
- } else {
153
- false
154
- } ;
88
+ let sanitize_url = UrlRelative :: Custom ( Box :: new ( SanitizeUrl :: new ( base_url) ) ) ;
155
89
156
90
let mut html_sanitizer = Builder :: new ( ) ;
157
91
html_sanitizer
158
92
. link_rel ( Some ( "nofollow noopener noreferrer" ) )
159
93
. tags ( tags)
160
94
. tag_attributes ( tag_attributes)
161
95
. allowed_classes ( allowed_classes)
162
- . url_relative ( if use_relative {
163
- UrlRelative :: Custom ( Box :: new ( relative_url_sanitizer) )
164
- } else {
165
- UrlRelative :: Custom ( Box :: new ( unrelative_url_sanitizer) )
166
- } )
96
+ . url_relative ( sanitize_url)
167
97
. id_prefix ( Some ( "user-content-" ) ) ;
168
-
169
98
MarkdownRenderer { html_sanitizer }
170
99
}
171
100
@@ -186,6 +115,72 @@ impl<'a> MarkdownRenderer<'a> {
186
115
}
187
116
}
188
117
118
+ /// Add trailing slash and remove `.git` suffix of base URL.
119
+ fn canon_base_url ( mut base_url : String ) -> String {
120
+ if !base_url. ends_with ( '/' ) {
121
+ base_url. push ( '/' ) ;
122
+ }
123
+ if base_url. ends_with ( ".git/" ) {
124
+ let offset = base_url. len ( ) - 5 ;
125
+ base_url. drain ( offset..offset + 4 ) ;
126
+ }
127
+ base_url
128
+ }
129
+
130
+ /// Sanitize relative URLs in README files.
131
+ struct SanitizeUrl {
132
+ base_url : Option < String > ,
133
+ }
134
+
135
+ impl SanitizeUrl {
136
+ fn new ( base_url : Option < & str > ) -> Self {
137
+ let base_url = base_url
138
+ . and_then ( |base_url| Url :: parse ( base_url) . ok ( ) )
139
+ . and_then ( |url| match url. host_str ( ) {
140
+ Some ( "github.com" ) | Some ( "gitlab.com" ) | Some ( "bitbucket.org" ) => {
141
+ Some ( canon_base_url ( url. into_string ( ) ) )
142
+ }
143
+ _ => None ,
144
+ } ) ;
145
+ Self { base_url }
146
+ }
147
+ }
148
+
149
+ /// Determine whether the given URL has a media file externsion.
150
+ fn is_media_url ( url : & str ) -> bool {
151
+ Path :: new ( url)
152
+ . extension ( )
153
+ . and_then ( std:: ffi:: OsStr :: to_str)
154
+ . map_or ( false , |e| match e {
155
+ "png" | "svg" | "jpg" | "jpeg" | "gif" | "mp4" | "webm" | "ogg" => true ,
156
+ _ => false ,
157
+ } )
158
+ }
159
+
160
+ impl UrlRelativeEvaluate for SanitizeUrl {
161
+ fn evaluate < ' a > ( & self , url : & ' a str ) -> Option < Cow < ' a , str > > {
162
+ if url. starts_with ( '#' ) {
163
+ // Always allow fragment URLs.
164
+ return Some ( Cow :: Borrowed ( url) ) ;
165
+ }
166
+ self . base_url . as_ref ( ) . map ( |base_url| {
167
+ let mut new_url = base_url. clone ( ) ;
168
+ // Assumes GitHub’s URL scheme. GitHub renders text and markdown
169
+ // better in the "blob" view, but images need to be served raw.
170
+ new_url += if is_media_url ( url) {
171
+ "raw/master"
172
+ } else {
173
+ "blob/master"
174
+ } ;
175
+ if !url. starts_with ( '/' ) {
176
+ new_url. push ( '/' ) ;
177
+ }
178
+ new_url += url;
179
+ Cow :: Owned ( new_url)
180
+ } )
181
+ }
182
+ }
183
+
189
184
/// Renders Markdown text to sanitized HTML with a given `base_url`.
190
185
/// See `readme_to_html` for the interpretation of `base_url`.
191
186
fn markdown_to_html ( text : & str , base_url : Option < & str > ) -> CargoResult < String > {
0 commit comments