From 19646015efe70e5decdceb659d5268430d91eff1 Mon Sep 17 00:00:00 2001 From: Mauri de Souza Meneguzzo Date: Thu, 27 Jul 2023 22:25:16 -0300 Subject: [PATCH 1/2] regexp/syntax: accept (?...) as valid capture Currently the only named capture supported by regexp is (?Pa). The syntax (?a) is also widely used and there is currently an effort from the Rust regex and RE2 teams to also accept this syntax. Fixes #58458 --- src/regexp/syntax/parse.go | 19 ++++++++++++++----- src/regexp/syntax/parse_test.go | 6 ++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go index accee9ab089edc..a4ccfe3bdbc75d 100644 --- a/src/regexp/syntax/parse.go +++ b/src/regexp/syntax/parse.go @@ -1159,9 +1159,18 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the - // Google source tree, (?Pname) is the dominant form, - // so that's the one we implement. One is enough. - if len(t) > 4 && t[2] == 'P' && t[3] == '<' { + // Google source tree, (?Pname) and (?name) are the + // dominant forms of named captures and both are supported. + startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<' + startsWithName := len(t) > 3 && t[2] == '<' + + if startsWithP || startsWithName { + // position of expr start + exprStartPos := 4 + if startsWithName { + exprStartPos = 3 + } + // Pull out name. end := strings.IndexRune(t, '>') if end < 0 { @@ -1171,8 +1180,8 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { return "", &Error{ErrInvalidNamedCapture, s} } - capture := t[:end+1] // "(?P" - name := t[4:end] // "name" + capture := t[:end+1] // "(?P" or "(?" + name := t[exprStartPos:end] // "name" if err = checkUTF8(name); err != nil { return "", err } diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go index 67e3c5622a0fef..d7999046e0eaaa 100644 --- a/src/regexp/syntax/parse_test.go +++ b/src/regexp/syntax/parse_test.go @@ -160,6 +160,7 @@ var parseTests = []parseTest{ // Test named captures {`(?Pa)`, `cap{name:lit{a}}`}, + {`(?a)`, `cap{name:lit{a}}`}, // Case-folded literals {`[Aa]`, `litfold{A}`}, @@ -482,6 +483,11 @@ var invalidRegexps = []string{ `(?Pa)`, `(?P<>a)`, + `(?a`, + `(?`, + `(?a)`, + `(?<>a)`, `[a-Z]`, `(?i)[a-Z]`, `\Q\E*`, From 31b50e6ab40cfb0f36df6f570525657d4680017f Mon Sep 17 00:00:00 2001 From: Mauri de Souza Meneguzzo Date: Sat, 29 Jul 2023 17:22:33 -0300 Subject: [PATCH 2/2] add new regex syntax to doc --- src/regexp/syntax/doc.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/regexp/syntax/doc.go b/src/regexp/syntax/doc.go index f6a4b43f7aebb1..eb8a971c734874 100644 --- a/src/regexp/syntax/doc.go +++ b/src/regexp/syntax/doc.go @@ -56,6 +56,7 @@ Grouping: (re) numbered capturing group (submatch) (?Pre) named & numbered capturing group (submatch) + (?re) named & numbered capturing group (submatch) (?:re) non-capturing group (?flags) set flags within current group; non-capturing (?flags:re) set flags during re; non-capturing