diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go index b6c4b8fd5693d1..404317b3c03019 100644 --- a/src/cmd/compile/internal/syntax/parser_test.go +++ b/src/cmd/compile/internal/syntax/parser_test.go @@ -13,6 +13,7 @@ import ( "path/filepath" "regexp" "runtime" + "slices" "strings" "sync" "testing" @@ -38,6 +39,92 @@ func TestVerify(t *testing.T) { verifyPrint(t, *src_, ast) } +// To run only this benchmark and obtain results for benchstat: +// go test -bench=ParseStdLib -benchtime=5s -run none -count=20 +func BenchmarkParseStdLib(b *testing.B) { + if testing.Short() { + b.Skip("skipping test in short mode") + } + var skipRx *regexp.Regexp + if *skip != "" { + var err error + skipRx, err = regexp.Compile(*skip) + if err != nil { + b.Fatalf("invalid argument for -skip (%v)", err) + } + } + // We read in all files to ignore + type file struct { + name string + base *PosBase + data []byte // data populated only for files being tested. + size int64 + } + var files []file + goroot := testenv.GOROOT(b) + dirs := []string{ + filepath.Join(goroot, "src"), + filepath.Join(goroot, "misc"), + } + for _, dir := range dirs { + walkDirs(b, dir, func(filename string) { + if skipRx != nil && skipRx.MatchString(filename) { + // Always report skipped files since regexp + // typos can lead to surprising results. + fmt.Printf("skipping %s\n", filename) + return + } + info, err := os.Stat(filename) + if err != nil { + b.Fatal(err) + } + files = append(files, file{ + name: filename, + size: info.Size(), + base: NewFileBase(filename), + }) + }) + } + const numberOfFiles = 10 + if len(files) < numberOfFiles*2 { + b.Error("too few files matched to run") + } + loadFile := func(f *file) { + var err error + f.data, err = os.ReadFile(f.name) + if err != nil { + b.Fatal(err) + } + } + slices.SortStableFunc(files, func(a, b file) int { + return int(a.size - b.size) + }) + // We load the files we'll be testing into memory to avoid noise introduced by operating system. + for i := 0; i < numberOfFiles; i++ { + loadFile(&files[i]) // Load smallest files. + loadFile(&files[len(files)-i-1]) // Load largest files. + } + b.ResetTimer() + b.Run(fmt.Sprintf("longest %d files", numberOfFiles), func(b *testing.B) { + var buf bytes.Reader + for i := 0; i < b.N; i++ { + for _, file := range files[len(files)-numberOfFiles:] { + buf.Reset(file.data) + Parse(file.base, &buf, nil, nil, 0) + } + } + }) + b.Run(fmt.Sprintf("shortest %d files", numberOfFiles), func(b *testing.B) { + var buf bytes.Reader + for i := 0; i < b.N; i++ { + for _, file := range files[:numberOfFiles] { + buf.Reset(file.data) + Parse(file.base, &buf, nil, nil, 0) + } + } + }) +} + func TestStdLib(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode") @@ -123,7 +210,7 @@ func TestStdLib(t *testing.T) { fmt.Printf("allocated %.3fMb (%.3fMb/s)\n", dm, dm/dt.Seconds()) } -func walkDirs(t *testing.T, dir string, action func(string)) { +func walkDirs(t testing.TB, dir string, action func(string)) { entries, err := os.ReadDir(dir) if err != nil { t.Error(err) diff --git a/src/go/token/token.go b/src/go/token/token.go index aa5d6e02a6f287..6835fdd2f4ac13 100644 --- a/src/go/token/token.go +++ b/src/go/token/token.go @@ -279,19 +279,38 @@ func (op Token) Precedence() int { return LowestPrec } -var keywords map[string]Token +// hash is a perfect hash function for keywords. +// It assumes that s has at least length 2. +func hash(s string) uint { + // If you get collisions on adding a keyword you'll need to + // process more bytes of the identifier since this'll indicate + // two keywords share the same first two bytes. + // Best course of action is incrementing keyword map size or tuning the hash operations. + return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1) +} + +// keywordMap is a perfect map taken from src/cmd/compile/internal/syntax/scanner.go +var keywordMap [1 << 6]Token // size must be power of two func init() { - keywords = make(map[string]Token, keyword_end-(keyword_beg+1)) - for i := keyword_beg + 1; i < keyword_end; i++ { - keywords[tokens[i]] = i + // populate keywordMap + for tok := keyword_beg + 1; tok < keyword_end; tok++ { + h := hash(tok.String()) + if keywordMap[h] != 0 { + panic("imperfect hash") + } + keywordMap[h] = tok } } // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword). func Lookup(ident string) Token { - if tok, is_keyword := keywords[ident]; is_keyword { - return tok + if len(ident) < 2 { + return IDENT + } + maybeMatch := keywordMap[hash(ident)] + if maybeMatch != 0 && maybeMatch.String() == ident { + return maybeMatch } return IDENT } @@ -319,10 +338,12 @@ func IsExported(name string) bool { } // IsKeyword reports whether name is a Go keyword, such as "func" or "return". -func IsKeyword(name string) bool { - // TODO: opt: use a perfect hash function instead of a global map. - _, ok := keywords[name] - return ok +func IsKeyword(ident string) bool { + if len(ident) < 2 { + return false + } + tok := keywordMap[hash(ident)] + return tok != 0 && tok.String() == ident } // IsIdentifier reports whether name is a Go identifier, that is, a non-empty