Skip to content

Commit 0c17cfb

Browse files
committed
rustdoc: check parsing diffs between pulldown-cmark 0.9.6 and 0.10
This commit is not meant to be merged as-is. It's meant to run in Crater, so that we can estimate the impact of bumping to the new version of the markdown parser.
1 parent 829308e commit 0c17cfb

File tree

7 files changed

+428
-0
lines changed

7 files changed

+428
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4704,6 +4704,7 @@ dependencies = [
47044704
"itertools",
47054705
"minifier",
47064706
"once_cell",
4707+
"pulldown-cmark 0.10.0",
47074708
"regex",
47084709
"rustdoc-json-types",
47094710
"serde",

src/librustdoc/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ itertools = "0.11"
1313
indexmap = "2"
1414
minifier = "0.3.0"
1515
once_cell = "1.10.0"
16+
pulldown-cmark-new = { version = "0.10", package = "pulldown-cmark", default-features = false }
1617
regex = "1"
1718
rustdoc-json-types = { path = "../rustdoc-json-types" }
1819
serde_json = "1.0"

src/librustdoc/lint.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ declare_rustdoc_lint! {
196196
"detects redundant explicit links in doc comments"
197197
}
198198

199+
declare_rustdoc_lint! {
200+
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
201+
/// the new one.
202+
UNPORTABLE_MARKDOWN,
203+
Deny,
204+
"detects markdown that is interpreted differently in different parser"
205+
}
206+
199207
pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
200208
vec![
201209
BROKEN_INTRA_DOC_LINKS,
@@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
209217
MISSING_CRATE_LEVEL_DOCS,
210218
UNESCAPED_BACKTICKS,
211219
REDUNDANT_EXPLICIT_LINKS,
220+
UNPORTABLE_MARKDOWN,
212221
]
213222
});
214223

src/librustdoc/passes/lint.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod check_code_block_syntax;
66
mod html_tags;
77
mod redundant_explicit_links;
88
mod unescaped_backticks;
9+
mod unportable_markdown;
910

1011
use super::Pass;
1112
use crate::clean::*;
@@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> {
3132
html_tags::visit_item(self.cx, item);
3233
unescaped_backticks::visit_item(self.cx, item);
3334
redundant_explicit_links::visit_item(self.cx, item);
35+
unportable_markdown::visit_item(self.cx, item);
3436

3537
self.visit_item_recur(item)
3638
}
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
//! Detects markdown syntax that's different between pulldown-cmark
2+
//! 0.9 and 0.10.
3+
4+
use crate::clean::Item;
5+
use crate::core::DocContext;
6+
use crate::html::markdown::main_body_opts;
7+
use pulldown_cmark as cmarko;
8+
use pulldown_cmark_new as cmarkn;
9+
use rustc_resolve::rustdoc::source_span_for_markdown_range;
10+
11+
pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
12+
let tcx = cx.tcx;
13+
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
14+
// If non-local, no need to check anything.
15+
return;
16+
};
17+
18+
let dox = item.doc_value();
19+
if dox.is_empty() {
20+
return;
21+
}
22+
23+
let link_names = item.link_names(&cx.cache);
24+
let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| {
25+
link_names
26+
.iter()
27+
.find(|link| *link.original_text == *broken_link.reference)
28+
.map(|link| ((*link.href).into(), (*link.new_text).into()))
29+
};
30+
let parser_old = cmarko::Parser::new_with_broken_link_callback(
31+
&dox,
32+
main_body_opts(),
33+
Some(&mut replacer_old),
34+
)
35+
.into_offset_iter()
36+
// Not worth cleaning up minor "distinctions without difference" in the AST.
37+
// Text events get chopped up differently between versions.
38+
// <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints.
39+
.filter(|(event, _event_range)| {
40+
!matches!(event, cmarko::Event::Code(_) | cmarko::Event::Text(_) | cmarko::Event::Html(_))
41+
});
42+
43+
pub fn main_body_opts_new() -> cmarkn::Options {
44+
cmarkn::Options::ENABLE_TABLES
45+
| cmarkn::Options::ENABLE_FOOTNOTES
46+
| cmarkn::Options::ENABLE_STRIKETHROUGH
47+
| cmarkn::Options::ENABLE_TASKLISTS
48+
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
49+
}
50+
let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| {
51+
link_names
52+
.iter()
53+
.find(|link| *link.original_text == *broken_link.reference)
54+
.map(|link| ((*link.href).into(), (*link.new_text).into()))
55+
};
56+
let parser_new = cmarkn::Parser::new_with_broken_link_callback(
57+
&dox,
58+
main_body_opts_new(),
59+
Some(&mut replacer_new),
60+
)
61+
.into_offset_iter()
62+
.filter(|(event, _event_range)| {
63+
!matches!(
64+
event,
65+
cmarkn::Event::Code(_)
66+
| cmarkn::Event::Text(_)
67+
| cmarkn::Event::Html(_)
68+
| cmarkn::Event::InlineHtml(_)
69+
)
70+
});
71+
72+
let mut reported_an_error = false;
73+
for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) {
74+
match (event_old, event_new) {
75+
(
76+
cmarko::Event::Start(cmarko::Tag::Emphasis),
77+
cmarkn::Event::Start(cmarkn::Tag::Emphasis),
78+
)
79+
| (
80+
cmarko::Event::Start(cmarko::Tag::Strong),
81+
cmarkn::Event::Start(cmarkn::Tag::Strong),
82+
)
83+
| (
84+
cmarko::Event::Start(cmarko::Tag::Strikethrough),
85+
cmarkn::Event::Start(cmarkn::Tag::Strikethrough),
86+
)
87+
| (
88+
cmarko::Event::Start(cmarko::Tag::Link(..)),
89+
cmarkn::Event::Start(cmarkn::Tag::Link { .. }),
90+
)
91+
| (
92+
cmarko::Event::Start(cmarko::Tag::Image(..)),
93+
cmarkn::Event::Start(cmarkn::Tag::Image { .. }),
94+
)
95+
| (
96+
cmarko::Event::End(cmarko::Tag::Emphasis),
97+
cmarkn::Event::End(cmarkn::TagEnd::Emphasis),
98+
)
99+
| (
100+
cmarko::Event::End(cmarko::Tag::Strong),
101+
cmarkn::Event::End(cmarkn::TagEnd::Strong),
102+
)
103+
| (
104+
cmarko::Event::End(cmarko::Tag::Strikethrough),
105+
cmarkn::Event::End(cmarkn::TagEnd::Strikethrough),
106+
)
107+
| (
108+
cmarko::Event::End(cmarko::Tag::Link(..)),
109+
cmarkn::Event::End(cmarkn::TagEnd::Link),
110+
)
111+
| (
112+
cmarko::Event::End(cmarko::Tag::Image(..)),
113+
cmarkn::Event::End(cmarkn::TagEnd::Image),
114+
)
115+
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..))
116+
| (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak)
117+
| (cmarko::Event::HardBreak, cmarkn::Event::HardBreak)
118+
if event_range_old == event_range_new =>
119+
{
120+
// Matching tags. Do nothing.
121+
}
122+
(
123+
cmarko::Event::Start(cmarko::Tag::Paragraph),
124+
cmarkn::Event::Start(cmarkn::Tag::Paragraph),
125+
)
126+
| (
127+
cmarko::Event::Start(cmarko::Tag::Heading(..)),
128+
cmarkn::Event::Start(cmarkn::Tag::Heading { .. }),
129+
)
130+
| (
131+
cmarko::Event::Start(cmarko::Tag::BlockQuote),
132+
cmarkn::Event::Start(cmarkn::Tag::BlockQuote),
133+
)
134+
| (
135+
cmarko::Event::Start(cmarko::Tag::CodeBlock(..)),
136+
cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..)),
137+
)
138+
| (
139+
cmarko::Event::Start(cmarko::Tag::List(..)),
140+
cmarkn::Event::Start(cmarkn::Tag::List(..)),
141+
)
142+
| (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item))
143+
| (
144+
cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)),
145+
cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..)),
146+
)
147+
| (
148+
cmarko::Event::Start(cmarko::Tag::Table(..)),
149+
cmarkn::Event::Start(cmarkn::Tag::Table(..)),
150+
)
151+
| (
152+
cmarko::Event::Start(cmarko::Tag::TableHead),
153+
cmarkn::Event::Start(cmarkn::Tag::TableHead),
154+
)
155+
| (
156+
cmarko::Event::Start(cmarko::Tag::TableRow),
157+
cmarkn::Event::Start(cmarkn::Tag::TableRow),
158+
)
159+
| (
160+
cmarko::Event::Start(cmarko::Tag::TableCell),
161+
cmarkn::Event::Start(cmarkn::Tag::TableCell),
162+
)
163+
| (
164+
cmarko::Event::End(cmarko::Tag::Paragraph),
165+
cmarkn::Event::End(cmarkn::TagEnd::Paragraph),
166+
)
167+
| (
168+
cmarko::Event::End(cmarko::Tag::Heading(..)),
169+
cmarkn::Event::End(cmarkn::TagEnd::Heading(_)),
170+
)
171+
| (
172+
cmarko::Event::End(cmarko::Tag::BlockQuote),
173+
cmarkn::Event::End(cmarkn::TagEnd::BlockQuote),
174+
)
175+
| (
176+
cmarko::Event::End(cmarko::Tag::CodeBlock(..)),
177+
cmarkn::Event::End(cmarkn::TagEnd::CodeBlock),
178+
)
179+
| (
180+
cmarko::Event::End(cmarko::Tag::List(..)),
181+
cmarkn::Event::End(cmarkn::TagEnd::List(_)),
182+
)
183+
| (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item))
184+
| (
185+
cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)),
186+
cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition),
187+
)
188+
| (
189+
cmarko::Event::End(cmarko::Tag::Table(..)),
190+
cmarkn::Event::End(cmarkn::TagEnd::Table),
191+
)
192+
| (
193+
cmarko::Event::End(cmarko::Tag::TableHead),
194+
cmarkn::Event::End(cmarkn::TagEnd::TableHead),
195+
)
196+
| (
197+
cmarko::Event::End(cmarko::Tag::TableRow),
198+
cmarkn::Event::End(cmarkn::TagEnd::TableRow),
199+
)
200+
| (
201+
cmarko::Event::End(cmarko::Tag::TableCell),
202+
cmarkn::Event::End(cmarkn::TagEnd::TableCell),
203+
) => {
204+
// Matching tags. Do nothing.
205+
//
206+
// Parsers sometimes differ in what they consider the "range of an event,"
207+
// even though the event is really the same. Inlines are pretty consistent,
208+
// but stuff like list items? Not really.
209+
//
210+
// Mismatched block elements will usually nest differently, so ignoring it
211+
// works good enough.
212+
}
213+
// If we've already reported an error on the start tag, don't bother on the end tag.
214+
(cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {}
215+
// Non-matching inline.
216+
(cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..))
217+
| (
218+
cmarko::Event::Start(cmarko::Tag::Image(..)),
219+
cmarkn::Event::FootnoteReference(..),
220+
)
221+
| (
222+
cmarko::Event::FootnoteReference(..),
223+
cmarkn::Event::Start(cmarkn::Tag::Link { .. }),
224+
)
225+
| (
226+
cmarko::Event::FootnoteReference(..),
227+
cmarkn::Event::Start(cmarkn::Tag::Image { .. }),
228+
) if event_range_old == event_range_new => {
229+
reported_an_error = true;
230+
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
231+
// use the span of the entire attribute as a fallback.
232+
let span = source_span_for_markdown_range(
233+
tcx,
234+
&dox,
235+
&event_range_old,
236+
&item.attrs.doc_strings,
237+
)
238+
.unwrap_or_else(|| item.attr_span(tcx));
239+
tcx.node_span_lint(
240+
crate::lint::UNPORTABLE_MARKDOWN,
241+
hir_id,
242+
span,
243+
"unportable markdown",
244+
|lint| {
245+
lint.help(format!("syntax ambiguous between footnote and link"));
246+
},
247+
);
248+
}
249+
// Non-matching results.
250+
(event_old, event_new) => {
251+
reported_an_error = true;
252+
let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end
253+
- event_range_old.start
254+
< event_range_new.end - event_range_new.start
255+
{
256+
(
257+
event_range_old,
258+
event_range_new,
259+
"old",
260+
"new",
261+
format!("{event_old:?}"),
262+
format!("{event_new:?}"),
263+
)
264+
} else {
265+
(
266+
event_range_new,
267+
event_range_old,
268+
"new",
269+
"old",
270+
format!("{event_new:?}"),
271+
format!("{event_old:?}"),
272+
)
273+
};
274+
let (range, tag_other) =
275+
if range_other.start <= range.start && range_other.end <= range.end {
276+
(range_other.start..range.end, tag_other)
277+
} else {
278+
(range, format!("nothing"))
279+
};
280+
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
281+
// use the span of the entire attribute as a fallback.
282+
let span =
283+
source_span_for_markdown_range(tcx, &dox, &range, &item.attrs.doc_strings)
284+
.unwrap_or_else(|| item.attr_span(tcx));
285+
tcx.node_span_lint(
286+
crate::lint::UNPORTABLE_MARKDOWN,
287+
hir_id,
288+
span,
289+
"unportable markdown",
290+
|lint| {
291+
lint.help(format!(
292+
"{desc} parser sees {tag}, {desc_other} sees {tag_other}"
293+
));
294+
},
295+
);
296+
}
297+
}
298+
}
299+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
2+
//
3+
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.10 fixes.
4+
5+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
6+
///
7+
/// Test footnote [^foot].
8+
///
9+
/// [^foot]: This is nested within the footnote now, but didn't used to be.
10+
//~^ ERROR unportable markdown
11+
///
12+
/// This is a multi-paragraph footnote.
13+
pub struct GfmFootnotes;
14+
15+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/750>
16+
///
17+
/// test [^]
18+
//~^ ERROR unportable markdown
19+
///
20+
/// [^]: test2
21+
pub struct FootnoteEmptyName;
22+
23+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
24+
///
25+
/// - _t
26+
/// # test
27+
//~^ ERROR unportable markdown
28+
/// t_
29+
pub struct NestingCornerCase;
30+
31+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
32+
///
33+
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
34+
//~^ ERROR unportable markdown
35+
//~| ERROR unportable markdown
36+
pub struct Emphasis1;
37+
38+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
39+
///
40+
/// |
41+
//~^ ERROR unportable markdown
42+
//~| ERROR unportable markdown
43+
/// |
44+
pub struct NotEnoughTable;

0 commit comments

Comments
 (0)