Skip to content

Commit 60a8dbf

Browse files
mvdangriesemer
authored andcommitted
go/token: add IsIdentifier, IsKeyword, and IsExported
Telling whether a string is a valid Go identifier can seem like an easy task, but it's easy to forget about the edge cases. For example, some implementations out there forget that an empty string or keywords like "func" aren't valid identifiers. Add a simple implementation with proper Unicode support, and start using it in cmd/cover and cmd/doc. Other pieces of the standard library reimplement part of this logic, but don't use a "func(string) bool" signature, so we're leaving them untouched for now. Add some tests too, to ensure that we actually got these edge cases correctly. Since telling whether a string is a valid identifier requires knowing that it's not a valid keyword, add IsKeyword too. The internal map was already accessible via Lookup, but "Lookup(str) != IDENT" isn't as easy to understand as IsKeyword(str). And, as per Josh's suggestion, we could have IsKeyword (and probably Lookup too) use a perfect hash function instead of a global map. Finally, for consistency with these new functions, add IsExported. That makes go/ast.IsExported a bit redundant, so perhaps it can be deprecated in favor of go/token.IsExported in the future. Clarify that token.IsExported doesn't imply token.IsIdentifier, to avoid ambiguity. Fixes #30064. Change-Id: I0e0e49215fd7e47b603ebc2b5a44086c51ba57f7 Reviewed-on: https://go-review.googlesource.com/c/go/+/169018 Run-TryBot: Daniel Martí <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Robert Griesemer <[email protected]> Reviewed-by: Alan Donovan <[email protected]>
1 parent a01d108 commit 60a8dbf

File tree

5 files changed

+78
-46
lines changed

5 files changed

+78
-46
lines changed

src/cmd/cover/cover.go

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"log"
1717
"os"
1818
"sort"
19-
"unicode"
2019

2120
"cmd/internal/edit"
2221
"cmd/internal/objabi"
@@ -117,7 +116,7 @@ func parseFlags() error {
117116
return fmt.Errorf("too many options")
118117
}
119118

120-
if *varVar != "" && !isValidIdentifier(*varVar) {
119+
if *varVar != "" && !token.IsIdentifier(*varVar) {
121120
return fmt.Errorf("-var: %q is not a valid identifier", *varVar)
122121
}
123122

@@ -685,22 +684,6 @@ func (f *File) addVariables(w io.Writer) {
685684
}
686685
}
687686

688-
func isValidIdentifier(ident string) bool {
689-
if len(ident) == 0 {
690-
return false
691-
}
692-
for i, c := range ident {
693-
if i > 0 && unicode.IsDigit(c) {
694-
continue
695-
}
696-
if c == '_' || unicode.IsLetter(c) {
697-
continue
698-
}
699-
return false
700-
}
701-
return true
702-
}
703-
704687
// It is possible for positions to repeat when there is a line
705688
// directive that does not specify column information and the input
706689
// has not been passed through gofmt.

src/cmd/doc/main.go

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
"flag"
4343
"fmt"
4444
"go/build"
45+
"go/token"
4546
"io"
4647
"log"
4748
"os"
@@ -333,28 +334,18 @@ func parseSymbol(str string) (symbol, method string) {
333334
case 1:
334335
case 2:
335336
method = elem[1]
336-
isIdentifier(method)
337+
if !token.IsIdentifier(method) {
338+
log.Fatalf("invalid identifier %q", method)
339+
}
337340
default:
338341
log.Printf("too many periods in symbol specification")
339342
usage()
340343
}
341344
symbol = elem[0]
342-
isIdentifier(symbol)
343-
return
344-
}
345-
346-
// isIdentifier checks that the name is valid Go identifier, and
347-
// logs and exits if it is not.
348-
func isIdentifier(name string) {
349-
if len(name) == 0 {
350-
log.Fatal("empty symbol")
351-
}
352-
for i, ch := range name {
353-
if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
354-
continue
355-
}
356-
log.Fatalf("invalid identifier %q", name)
345+
if !token.IsIdentifier(symbol) {
346+
log.Fatalf("invalid identifier %q", symbol)
357347
}
348+
return
358349
}
359350

360351
// isExported reports whether the name is an exported identifier.

src/go/ast/ast.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ package ast
1010
import (
1111
"go/token"
1212
"strings"
13-
"unicode"
14-
"unicode/utf8"
1513
)
1614

1715
// ----------------------------------------------------------------------------
@@ -523,18 +521,13 @@ func (*ChanType) exprNode() {}
523521
//
524522
func NewIdent(name string) *Ident { return &Ident{token.NoPos, name, nil} }
525523

526-
// IsExported reports whether name is an exported Go symbol
527-
// (that is, whether it begins with an upper-case letter).
524+
// IsExported reports whether name starts with an upper-case letter.
528525
//
529-
func IsExported(name string) bool {
530-
ch, _ := utf8.DecodeRuneInString(name)
531-
return unicode.IsUpper(ch)
532-
}
526+
func IsExported(name string) bool { return token.IsExported(name) }
533527

534-
// IsExported reports whether id is an exported Go symbol
535-
// (that is, whether it begins with an uppercase letter).
528+
// IsExported reports whether id starts with an upper-case letter.
536529
//
537-
func (id *Ident) IsExported() bool { return IsExported(id.Name) }
530+
func (id *Ident) IsExported() bool { return token.IsExported(id.Name) }
538531

539532
func (id *Ident) String() string {
540533
if id != nil {

src/go/token/token.go

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
//
88
package token
99

10-
import "strconv"
10+
import (
11+
"strconv"
12+
"unicode"
13+
"unicode/utf8"
14+
)
1115

1216
// Token is the set of lexical tokens of the Go programming language.
1317
type Token int
@@ -306,3 +310,31 @@ func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator
306310
// it returns false otherwise.
307311
//
308312
func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
313+
314+
// IsExported reports whether name starts with an upper-case letter.
315+
//
316+
func IsExported(name string) bool {
317+
ch, _ := utf8.DecodeRuneInString(name)
318+
return unicode.IsUpper(ch)
319+
}
320+
321+
// IsKeyword reports whether name is a Go keyword, such as "func" or "return".
322+
//
323+
func IsKeyword(name string) bool {
324+
// TODO: opt: use a perfect hash function instead of a global map.
325+
_, ok := keywords[name]
326+
return ok
327+
}
328+
329+
// IsIdentifier reports whether name is a Go identifier, that is, a non-empty
330+
// string made up of letters, digits, and underscores, where the first character
331+
// is not a digit. Keywords are not identifiers.
332+
//
333+
func IsIdentifier(name string) bool {
334+
for i, c := range name {
335+
if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
336+
return false
337+
}
338+
}
339+
return name != "" && !IsKeyword(name)
340+
}

src/go/token/token_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright 2019 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package token
6+
7+
import "testing"
8+
9+
func TestIsIdentifier(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
in string
13+
want bool
14+
}{
15+
{"Empty", "", false},
16+
{"Space", " ", false},
17+
{"SpaceSuffix", "foo ", false},
18+
{"Number", "123", false},
19+
{"Keyword", "func", false},
20+
21+
{"LettersASCII", "foo", true},
22+
{"MixedASCII", "_bar123", true},
23+
{"UppercaseKeyword", "Func", true},
24+
{"LettersUnicode", "fóö", true},
25+
}
26+
for _, test := range tests {
27+
t.Run(test.name, func(t *testing.T) {
28+
if got := IsIdentifier(test.in); got != test.want {
29+
t.Fatalf("IsIdentifier(%q) = %t, want %v", test.in, got, test.want)
30+
}
31+
})
32+
}
33+
}

0 commit comments

Comments
 (0)