|
| 1 | +//! Detects markdown syntax that's different between pulldown-cmark |
| 2 | +//! 0.9 and 0.10. |
| 3 | +
|
| 4 | +use crate::clean::Item; |
| 5 | +use crate::core::DocContext; |
| 6 | +use crate::html::markdown::main_body_opts; |
| 7 | +use pulldown_cmark as cmarko; |
| 8 | +use pulldown_cmark_new as cmarkn; |
| 9 | +use rustc_resolve::rustdoc::source_span_for_markdown_range; |
| 10 | + |
| 11 | +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { |
| 12 | + let tcx = cx.tcx; |
| 13 | + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { |
| 14 | + // If non-local, no need to check anything. |
| 15 | + return; |
| 16 | + }; |
| 17 | + |
| 18 | + let dox = item.doc_value(); |
| 19 | + if dox.is_empty() { |
| 20 | + return; |
| 21 | + } |
| 22 | + |
| 23 | + let link_names = item.link_names(&cx.cache); |
| 24 | + let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| { |
| 25 | + link_names |
| 26 | + .iter() |
| 27 | + .find(|link| *link.original_text == *broken_link.reference) |
| 28 | + .map(|link| ((*link.href).into(), (*link.new_text).into())) |
| 29 | + }; |
| 30 | + let parser_old = cmarko::Parser::new_with_broken_link_callback( |
| 31 | + &dox, |
| 32 | + main_body_opts(), |
| 33 | + Some(&mut replacer_old), |
| 34 | + ) |
| 35 | + .into_offset_iter() |
| 36 | + // Not worth cleaning up minor "distinctions without difference" in the AST. |
| 37 | + // Text events get chopped up differently between versions. |
| 38 | + // <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints. |
| 39 | + .filter(|(event, _event_range)| { |
| 40 | + !matches!( |
| 41 | + event, |
| 42 | + cmarko::Event::Code(_) |
| 43 | + | cmarko::Event::Text(_) |
| 44 | + | cmarko::Event::Html(_) |
| 45 | + | cmarko::Event::SoftBreak |
| 46 | + ) |
| 47 | + }); |
| 48 | + |
| 49 | + pub fn main_body_opts_new() -> cmarkn::Options { |
| 50 | + cmarkn::Options::ENABLE_TABLES |
| 51 | + | cmarkn::Options::ENABLE_FOOTNOTES |
| 52 | + | cmarkn::Options::ENABLE_STRIKETHROUGH |
| 53 | + | cmarkn::Options::ENABLE_TASKLISTS |
| 54 | + | cmarkn::Options::ENABLE_SMART_PUNCTUATION |
| 55 | + } |
| 56 | + let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| { |
| 57 | + link_names |
| 58 | + .iter() |
| 59 | + .find(|link| *link.original_text.trim() == *broken_link.reference.trim()) |
| 60 | + .map(|link| ((*link.href).into(), (*link.new_text).into())) |
| 61 | + }; |
| 62 | + let parser_new = cmarkn::Parser::new_with_broken_link_callback( |
| 63 | + &dox, |
| 64 | + main_body_opts_new(), |
| 65 | + Some(&mut replacer_new), |
| 66 | + ) |
| 67 | + .into_offset_iter() |
| 68 | + .filter(|(event, _event_range)| { |
| 69 | + !matches!( |
| 70 | + event, |
| 71 | + cmarkn::Event::Code(_) |
| 72 | + | cmarkn::Event::Text(_) |
| 73 | + | cmarkn::Event::Html(_) |
| 74 | + | cmarkn::Event::InlineHtml(_) |
| 75 | + | cmarkn::Event::SoftBreak |
| 76 | + ) |
| 77 | + }); |
| 78 | + |
| 79 | + let mut reported_an_error = false; |
| 80 | + for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) { |
| 81 | + match (event_old, event_new) { |
| 82 | + ( |
| 83 | + cmarko::Event::Start(cmarko::Tag::Emphasis), |
| 84 | + cmarkn::Event::Start(cmarkn::Tag::Emphasis), |
| 85 | + ) |
| 86 | + | ( |
| 87 | + cmarko::Event::Start(cmarko::Tag::Strong), |
| 88 | + cmarkn::Event::Start(cmarkn::Tag::Strong), |
| 89 | + ) |
| 90 | + | ( |
| 91 | + cmarko::Event::Start(cmarko::Tag::Strikethrough), |
| 92 | + cmarkn::Event::Start(cmarkn::Tag::Strikethrough), |
| 93 | + ) |
| 94 | + | ( |
| 95 | + cmarko::Event::End(cmarko::Tag::Emphasis), |
| 96 | + cmarkn::Event::End(cmarkn::TagEnd::Emphasis), |
| 97 | + ) |
| 98 | + | ( |
| 99 | + cmarko::Event::End(cmarko::Tag::Strong), |
| 100 | + cmarkn::Event::End(cmarkn::TagEnd::Strong), |
| 101 | + ) |
| 102 | + | ( |
| 103 | + cmarko::Event::End(cmarko::Tag::Strikethrough), |
| 104 | + cmarkn::Event::End(cmarkn::TagEnd::Strikethrough), |
| 105 | + ) |
| 106 | + | ( |
| 107 | + cmarko::Event::End(cmarko::Tag::Link(..)), |
| 108 | + cmarkn::Event::End(cmarkn::TagEnd::Link), |
| 109 | + ) |
| 110 | + | ( |
| 111 | + cmarko::Event::End(cmarko::Tag::Image(..)), |
| 112 | + cmarkn::Event::End(cmarkn::TagEnd::Image), |
| 113 | + ) |
| 114 | + | (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..)) |
| 115 | + | (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak) |
| 116 | + | (cmarko::Event::HardBreak, cmarkn::Event::HardBreak) |
| 117 | + | (cmarko::Event::TaskListMarker(false), cmarkn::Event::TaskListMarker(false)) |
| 118 | + | (cmarko::Event::TaskListMarker(true), cmarkn::Event::TaskListMarker(true)) |
| 119 | + if event_range_old == event_range_new => |
| 120 | + { |
| 121 | + // Matching tags. Do nothing. |
| 122 | + } |
| 123 | + ( |
| 124 | + cmarko::Event::Start(cmarko::Tag::Link(_, old_dest_url, old_title)), |
| 125 | + cmarkn::Event::Start(cmarkn::Tag::Link { dest_url, title, .. }), |
| 126 | + ) |
| 127 | + | ( |
| 128 | + cmarko::Event::Start(cmarko::Tag::Image(_, old_dest_url, old_title)), |
| 129 | + cmarkn::Event::Start(cmarkn::Tag::Image { dest_url, title, .. }), |
| 130 | + ) if event_range_old == event_range_new |
| 131 | + && &old_dest_url[..] == &dest_url[..] |
| 132 | + && &old_title[..] == &title[..] => |
| 133 | + { |
| 134 | + // Matching tags. Do nothing. |
| 135 | + } |
| 136 | + ( |
| 137 | + cmarko::Event::Start(cmarko::Tag::Paragraph), |
| 138 | + cmarkn::Event::Start(cmarkn::Tag::Paragraph), |
| 139 | + ) |
| 140 | + | ( |
| 141 | + cmarko::Event::Start(cmarko::Tag::Heading(..)), |
| 142 | + cmarkn::Event::Start(cmarkn::Tag::Heading { .. }), |
| 143 | + ) |
| 144 | + | ( |
| 145 | + cmarko::Event::Start(cmarko::Tag::BlockQuote), |
| 146 | + cmarkn::Event::Start(cmarkn::Tag::BlockQuote), |
| 147 | + ) |
| 148 | + | ( |
| 149 | + cmarko::Event::Start(cmarko::Tag::CodeBlock(..)), |
| 150 | + cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..)), |
| 151 | + ) |
| 152 | + | ( |
| 153 | + cmarko::Event::Start(cmarko::Tag::List(..)), |
| 154 | + cmarkn::Event::Start(cmarkn::Tag::List(..)), |
| 155 | + ) |
| 156 | + | (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item)) |
| 157 | + | ( |
| 158 | + cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)), |
| 159 | + cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..)), |
| 160 | + ) |
| 161 | + | ( |
| 162 | + cmarko::Event::Start(cmarko::Tag::Table(..)), |
| 163 | + cmarkn::Event::Start(cmarkn::Tag::Table(..)), |
| 164 | + ) |
| 165 | + | ( |
| 166 | + cmarko::Event::Start(cmarko::Tag::TableHead), |
| 167 | + cmarkn::Event::Start(cmarkn::Tag::TableHead), |
| 168 | + ) |
| 169 | + | ( |
| 170 | + cmarko::Event::Start(cmarko::Tag::TableRow), |
| 171 | + cmarkn::Event::Start(cmarkn::Tag::TableRow), |
| 172 | + ) |
| 173 | + | ( |
| 174 | + cmarko::Event::Start(cmarko::Tag::TableCell), |
| 175 | + cmarkn::Event::Start(cmarkn::Tag::TableCell), |
| 176 | + ) |
| 177 | + | ( |
| 178 | + cmarko::Event::End(cmarko::Tag::Paragraph), |
| 179 | + cmarkn::Event::End(cmarkn::TagEnd::Paragraph), |
| 180 | + ) |
| 181 | + | ( |
| 182 | + cmarko::Event::End(cmarko::Tag::Heading(..)), |
| 183 | + cmarkn::Event::End(cmarkn::TagEnd::Heading(_)), |
| 184 | + ) |
| 185 | + | ( |
| 186 | + cmarko::Event::End(cmarko::Tag::BlockQuote), |
| 187 | + cmarkn::Event::End(cmarkn::TagEnd::BlockQuote), |
| 188 | + ) |
| 189 | + | ( |
| 190 | + cmarko::Event::End(cmarko::Tag::CodeBlock(..)), |
| 191 | + cmarkn::Event::End(cmarkn::TagEnd::CodeBlock), |
| 192 | + ) |
| 193 | + | ( |
| 194 | + cmarko::Event::End(cmarko::Tag::List(..)), |
| 195 | + cmarkn::Event::End(cmarkn::TagEnd::List(_)), |
| 196 | + ) |
| 197 | + | (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item)) |
| 198 | + | ( |
| 199 | + cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)), |
| 200 | + cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition), |
| 201 | + ) |
| 202 | + | ( |
| 203 | + cmarko::Event::End(cmarko::Tag::Table(..)), |
| 204 | + cmarkn::Event::End(cmarkn::TagEnd::Table), |
| 205 | + ) |
| 206 | + | ( |
| 207 | + cmarko::Event::End(cmarko::Tag::TableHead), |
| 208 | + cmarkn::Event::End(cmarkn::TagEnd::TableHead), |
| 209 | + ) |
| 210 | + | ( |
| 211 | + cmarko::Event::End(cmarko::Tag::TableRow), |
| 212 | + cmarkn::Event::End(cmarkn::TagEnd::TableRow), |
| 213 | + ) |
| 214 | + | ( |
| 215 | + cmarko::Event::End(cmarko::Tag::TableCell), |
| 216 | + cmarkn::Event::End(cmarkn::TagEnd::TableCell), |
| 217 | + ) => { |
| 218 | + // Matching tags. Do nothing. |
| 219 | + // |
| 220 | + // Parsers sometimes differ in what they consider the "range of an event," |
| 221 | + // even though the event is really the same. Inlines are pretty consistent, |
| 222 | + // but stuff like list items? Not really. |
| 223 | + // |
| 224 | + // Mismatched block elements will usually nest differently, so ignoring it |
| 225 | + // works good enough. |
| 226 | + } |
| 227 | + // If we've already reported an error on the start tag, don't bother on the end tag. |
| 228 | + (cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {} |
| 229 | + // Non-matching inline. |
| 230 | + (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..)) |
| 231 | + | ( |
| 232 | + cmarko::Event::Start(cmarko::Tag::Image(..)), |
| 233 | + cmarkn::Event::FootnoteReference(..), |
| 234 | + ) |
| 235 | + | ( |
| 236 | + cmarko::Event::FootnoteReference(..), |
| 237 | + cmarkn::Event::Start(cmarkn::Tag::Link { .. }), |
| 238 | + ) |
| 239 | + | ( |
| 240 | + cmarko::Event::FootnoteReference(..), |
| 241 | + cmarkn::Event::Start(cmarkn::Tag::Image { .. }), |
| 242 | + ) if event_range_old == event_range_new => { |
| 243 | + reported_an_error = true; |
| 244 | + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, |
| 245 | + // use the span of the entire attribute as a fallback. |
| 246 | + let span = source_span_for_markdown_range( |
| 247 | + tcx, |
| 248 | + &dox, |
| 249 | + &event_range_old, |
| 250 | + &item.attrs.doc_strings, |
| 251 | + ) |
| 252 | + .unwrap_or_else(|| item.attr_span(tcx)); |
| 253 | + tcx.node_span_lint( |
| 254 | + crate::lint::UNPORTABLE_MARKDOWN, |
| 255 | + hir_id, |
| 256 | + span, |
| 257 | + "unportable markdown", |
| 258 | + |lint| { |
| 259 | + lint.help(format!("syntax ambiguous between footnote and link")); |
| 260 | + }, |
| 261 | + ); |
| 262 | + } |
| 263 | + // Non-matching results. |
| 264 | + (event_old, event_new) => { |
| 265 | + reported_an_error = true; |
| 266 | + let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end |
| 267 | + - event_range_old.start |
| 268 | + < event_range_new.end - event_range_new.start |
| 269 | + { |
| 270 | + ( |
| 271 | + event_range_old, |
| 272 | + event_range_new, |
| 273 | + "old", |
| 274 | + "new", |
| 275 | + format!("{event_old:?}"), |
| 276 | + format!("{event_new:?}"), |
| 277 | + ) |
| 278 | + } else { |
| 279 | + ( |
| 280 | + event_range_new, |
| 281 | + event_range_old, |
| 282 | + "new", |
| 283 | + "old", |
| 284 | + format!("{event_new:?}"), |
| 285 | + format!("{event_old:?}"), |
| 286 | + ) |
| 287 | + }; |
| 288 | + let (range, tag_other) = |
| 289 | + if range_other.start <= range.start && range_other.end <= range.end { |
| 290 | + (range_other.start..range.end, tag_other) |
| 291 | + } else { |
| 292 | + (range, format!("nothing")) |
| 293 | + }; |
| 294 | + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, |
| 295 | + // use the span of the entire attribute as a fallback. |
| 296 | + let span = |
| 297 | + source_span_for_markdown_range(tcx, &dox, &range, &item.attrs.doc_strings) |
| 298 | + .unwrap_or_else(|| item.attr_span(tcx)); |
| 299 | + tcx.node_span_lint( |
| 300 | + crate::lint::UNPORTABLE_MARKDOWN, |
| 301 | + hir_id, |
| 302 | + span, |
| 303 | + "unportable markdown", |
| 304 | + |lint| { |
| 305 | + lint.help(format!( |
| 306 | + "{desc} parser sees {tag}, {desc_other} sees {tag_other}" |
| 307 | + )); |
| 308 | + }, |
| 309 | + ); |
| 310 | + } |
| 311 | + } |
| 312 | + } |
| 313 | +} |
0 commit comments