Skip to content

Commit 6d41729

Browse files
wxiaoguanglunnyzeripath
authored
Fix markdown URL parsing (#17924)
Co-authored-by: Lunny Xiao <[email protected]> Co-authored-by: zeripath <[email protected]>
1 parent 379a524 commit 6d41729

File tree

3 files changed

+127
-75
lines changed

3 files changed

+127
-75
lines changed

modules/markup/html.go

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,21 @@ var (
4141
// While fast, this is also incorrect and lead to false positives.
4242
// TODO: fix invalid linking issue
4343

44+
// valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/]
45+
4446
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
4547
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
46-
// so that abbreviated hash links can be used as well. This matches git and github useability.
48+
// so that abbreviated hash links can be used as well. This matches git and GitHub usability.
4749
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`)
4850

4951
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
5052
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
5153

52-
// anySHA1Pattern allows to split url containing SHA into parts
53-
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
54+
// anySHA1Pattern splits url containing SHA into parts
55+
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
56+
57+
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
58+
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(\.\.\.?)([0-9a-f]{40})?(#[-+~_%.a-zA-Z0-9]+)?`)
5459

5560
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
5661

@@ -65,7 +70,7 @@ var (
6570
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
6671

6772
// EmojiShortCodeRegex find emoji by alias like :smile:
68-
EmojiShortCodeRegex = regexp.MustCompile(`:[\w\+\-]+:`)
73+
EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
6974
)
7075

7176
// CSS class for action keywords (e.g. "closes: #1")
@@ -152,6 +157,7 @@ type processor func(ctx *RenderContext, node *html.Node)
152157

153158
var defaultProcessors = []processor{
154159
fullIssuePatternProcessor,
160+
comparePatternProcessor,
155161
fullSha1PatternProcessor,
156162
shortLinkProcessor,
157163
linkProcessor,
@@ -178,6 +184,7 @@ func PostProcess(
178184

179185
var commitMessageProcessors = []processor{
180186
fullIssuePatternProcessor,
187+
comparePatternProcessor,
181188
fullSha1PatternProcessor,
182189
linkProcessor,
183190
mentionProcessor,
@@ -208,6 +215,7 @@ func RenderCommitMessage(
208215

209216
var commitMessageSubjectProcessors = []processor{
210217
fullIssuePatternProcessor,
218+
comparePatternProcessor,
211219
fullSha1PatternProcessor,
212220
linkProcessor,
213221
mentionProcessor,
@@ -920,12 +928,57 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
920928
if hash != "" {
921929
text += " (" + hash + ")"
922930
}
923-
924931
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
925932
node = node.NextSibling.NextSibling
926933
}
927934
}
928935

936+
func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
937+
if ctx.Metas == nil {
938+
return
939+
}
940+
941+
next := node.NextSibling
942+
for node != nil && node != next {
943+
m := comparePattern.FindStringSubmatchIndex(node.Data)
944+
if m == nil {
945+
return
946+
}
947+
948+
urlFull := node.Data[m[0]:m[1]]
949+
text1 := base.ShortSha(node.Data[m[2]:m[3]])
950+
textDots := base.ShortSha(node.Data[m[4]:m[5]])
951+
text2 := base.ShortSha(node.Data[m[6]:m[7]])
952+
953+
hash := ""
954+
if m[9] > 0 {
955+
hash = node.Data[m[8]:m[9]][1:]
956+
}
957+
958+
start := m[0]
959+
end := m[1]
960+
961+
// If url ends in '.', it's very likely that it is not part of the
962+
// actual url but used to finish a sentence.
963+
if strings.HasSuffix(urlFull, ".") {
964+
end--
965+
urlFull = urlFull[:len(urlFull)-1]
966+
if hash != "" {
967+
hash = hash[:len(hash)-1]
968+
} else if text2 != "" {
969+
text2 = text2[:len(text2)-1]
970+
}
971+
}
972+
973+
text := text1 + textDots + text2
974+
if hash != "" {
975+
text += " (" + hash + ")"
976+
}
977+
replaceContent(node, start, end, createCodeLink(urlFull, text, "compare"))
978+
node = node.NextSibling.NextSibling
979+
}
980+
}
981+
929982
// emojiShortCodeProcessor for rendering text like :smile: into emoji
930983
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
931984
start := 0
@@ -1038,8 +1091,8 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
10381091
continue
10391092
}
10401093

1041-
replaceContent(node, m[2], m[3],
1042-
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
1094+
link := util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash)
1095+
replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit"))
10431096
start = 0
10441097
node = node.NextSibling.NextSibling
10451098
}

modules/markup/html_internal_test.go

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ import (
1515
"github.com/stretchr/testify/assert"
1616
)
1717

18-
const AppURL = "http://localhost:3000/"
19-
const Repo = "gogits/gogs"
20-
const AppSubURL = AppURL + Repo + "/"
18+
const TestAppURL = "http://localhost:3000/"
19+
const TestOrgRepo = "gogits/gogs"
20+
const TestRepoURL = TestAppURL + TestOrgRepo + "/"
2121

2222
// alphanumLink an HTML link to an alphanumeric-style issue
2323
func alphanumIssueLink(baseURL, class, name string) string {
@@ -52,7 +52,7 @@ var alphanumericMetas = map[string]string{
5252
"style": IssueNameStyleAlphanumeric,
5353
}
5454

55-
// these values should match the Repo const above
55+
// these values should match the TestOrgRepo const above
5656
var localMetas = map[string]string{
5757
"user": "gogits",
5858
"repo": "gogs",
@@ -90,8 +90,7 @@ func TestRender_IssueIndexPattern(t *testing.T) {
9090
}
9191

9292
func TestRender_IssueIndexPattern2(t *testing.T) {
93-
setting.AppURL = AppURL
94-
setting.AppSubURL = AppSubURL
93+
setting.AppURL = TestAppURL
9594

9695
// numeric: render inputs with valid mentions
9796
test := func(s, expectedFmt, marker string, indices ...int) {
@@ -108,7 +107,7 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
108107

109108
links := make([]interface{}, len(indices))
110109
for i, index := range indices {
111-
links[i] = numericIssueLink(util.URLJoin(setting.AppSubURL, path), "ref-issue", index, marker)
110+
links[i] = numericIssueLink(util.URLJoin(TestRepoURL, path), "ref-issue", index, marker)
112111
}
113112
expectedNil := fmt.Sprintf(expectedFmt, links...)
114113
testRenderIssueIndexPattern(t, s, expectedNil, &RenderContext{Metas: localMetas})
@@ -152,8 +151,7 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
152151
}
153152

154153
func TestRender_IssueIndexPattern3(t *testing.T) {
155-
setting.AppURL = AppURL
156-
setting.AppSubURL = AppSubURL
154+
setting.AppURL = TestAppURL
157155

158156
// alphanumeric: render inputs without valid mentions
159157
test := func(s string) {
@@ -178,8 +176,7 @@ func TestRender_IssueIndexPattern3(t *testing.T) {
178176
}
179177

180178
func TestRender_IssueIndexPattern4(t *testing.T) {
181-
setting.AppURL = AppURL
182-
setting.AppSubURL = AppSubURL
179+
setting.AppURL = TestAppURL
183180

184181
// alphanumeric: render inputs with valid mentions
185182
test := func(s, expectedFmt string, names ...string) {
@@ -197,7 +194,7 @@ func TestRender_IssueIndexPattern4(t *testing.T) {
197194

198195
func testRenderIssueIndexPattern(t *testing.T, input, expected string, ctx *RenderContext) {
199196
if ctx.URLPrefix == "" {
200-
ctx.URLPrefix = AppSubURL
197+
ctx.URLPrefix = TestAppURL
201198
}
202199

203200
var buf strings.Builder
@@ -207,21 +204,20 @@ func testRenderIssueIndexPattern(t *testing.T, input, expected string, ctx *Rend
207204
}
208205

209206
func TestRender_AutoLink(t *testing.T) {
210-
setting.AppURL = AppURL
211-
setting.AppSubURL = AppSubURL
207+
setting.AppURL = TestAppURL
212208

213209
test := func(input, expected string) {
214210
var buffer strings.Builder
215211
err := PostProcess(&RenderContext{
216-
URLPrefix: setting.AppSubURL,
212+
URLPrefix: TestRepoURL,
217213
Metas: localMetas,
218214
}, strings.NewReader(input), &buffer)
219215
assert.Equal(t, err, nil)
220216
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer.String()))
221217

222218
buffer.Reset()
223219
err = PostProcess(&RenderContext{
224-
URLPrefix: setting.AppSubURL,
220+
URLPrefix: TestRepoURL,
225221
Metas: localMetas,
226222
IsWiki: true,
227223
}, strings.NewReader(input), &buffer)
@@ -230,11 +226,11 @@ func TestRender_AutoLink(t *testing.T) {
230226
}
231227

232228
// render valid issue URLs
233-
test(util.URLJoin(setting.AppSubURL, "issues", "3333"),
234-
numericIssueLink(util.URLJoin(setting.AppSubURL, "issues"), "ref-issue", 3333, "#"))
229+
test(util.URLJoin(TestRepoURL, "issues", "3333"),
230+
numericIssueLink(util.URLJoin(TestRepoURL, "issues"), "ref-issue", 3333, "#"))
235231

236232
// render valid commit URLs
237-
tmp := util.URLJoin(AppSubURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae")
233+
tmp := util.URLJoin(TestRepoURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae")
238234
test(tmp, "<a href=\""+tmp+"\" class=\"commit\"><code class=\"nohighlight\">d8a994ef24</code></a>")
239235
tmp += "#diff-2"
240236
test(tmp, "<a href=\""+tmp+"\" class=\"commit\"><code class=\"nohighlight\">d8a994ef24 (diff-2)</code></a>")
@@ -245,13 +241,12 @@ func TestRender_AutoLink(t *testing.T) {
245241
}
246242

247243
func TestRender_FullIssueURLs(t *testing.T) {
248-
setting.AppURL = AppURL
249-
setting.AppSubURL = AppSubURL
244+
setting.AppURL = TestAppURL
250245

251246
test := func(input, expected string) {
252247
var result strings.Builder
253248
err := postProcess(&RenderContext{
254-
URLPrefix: AppSubURL,
249+
URLPrefix: TestRepoURL,
255250
Metas: localMetas,
256251
}, []processor{fullIssuePatternProcessor}, strings.NewReader(input), &result)
257252
assert.NoError(t, err)

0 commit comments

Comments
 (0)