From 845a0108db85ba4bb29f3bd884826cd27f9307ee Mon Sep 17 00:00:00 2001
From: Patricio Whittingslow <graded.sp@gmail.com>
Date: Fri, 28 Nov 2025 18:37:50 -0300
Subject: [PATCH 1/5] go/token: replace map with array for looking up keywords

array access has considerable less overhead than map access
thus yielding benefits in performance and package
initialization.
---
 src/go/token/token.go | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/go/token/token.go b/src/go/token/token.go
index aa5d6e02a6f287..6ab4ee655d914a 100644
--- a/src/go/token/token.go
+++ b/src/go/token/token.go
@@ -279,19 +279,35 @@ func (op Token) Precedence() int {
 	return LowestPrec
 }
 
-var keywords map[string]Token
+var keywords [256]Token
 
 func init() {
-	keywords = make(map[string]Token, keyword_end-(keyword_beg+1))
 	for i := keyword_beg + 1; i < keyword_end; i++ {
-		keywords[tokens[i]] = i
+		keywords[keywordsIndex(i.String())] = i
 	}
 }
 
+// keywordsIndex maps an identifier to an index in keywords array.
+func keywordsIndex(maybeKeyword string) uint8 {
+	if len(maybeKeyword) <= 3 {
+		if len(maybeKeyword) == 0 {
+			return 0
+		}
+		return maybeKeyword[0]
+	}
+	v0 := maybeKeyword[0]
+	v1 := maybeKeyword[1]
+	v2 := maybeKeyword[2]
+	v3 := maybeKeyword[3]
+	h := v0 + v1*8 + v2 - v3
+	return h
+}
+
 // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
 func Lookup(ident string) Token {
-	if tok, is_keyword := keywords[ident]; is_keyword {
-		return tok
+	maybeMatch := keywords[keywordsIndex(ident)]
+	if maybeMatch != 0 && maybeMatch.String() == ident {
+		return maybeMatch
 	}
 	return IDENT
 }
@@ -319,10 +335,9 @@ func IsExported(name string) bool {
 }
 
 // IsKeyword reports whether name is a Go keyword, such as "func" or "return".
-func IsKeyword(name string) bool {
-	// TODO: opt: use a perfect hash function instead of a global map.
-	_, ok := keywords[name]
-	return ok
+func IsKeyword(ident string) bool {
+	tok := keywords[keywordsIndex(ident)]
+	return tok != 0 && tok.String() == ident
 }
 
 // IsIdentifier reports whether name is a Go identifier, that is, a non-empty

From 087361512fa461419406e44ab82bd94f26b1817f Mon Sep 17 00:00:00 2001
From: Patricio Whittingslow <graded.sp@gmail.com>
Date: Sun, 30 Nov 2025 18:34:38 -0300
Subject: [PATCH 2/5] add documentation on how hashing works

---
 src/go/token/token.go | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/go/token/token.go b/src/go/token/token.go
index 6ab4ee655d914a..7a92d623da70ac 100644
--- a/src/go/token/token.go
+++ b/src/go/token/token.go
@@ -290,11 +290,25 @@ func init() {
 // keywordsIndex maps an identifier to an index in keywords array.
 func keywordsIndex(maybeKeyword string) uint8 {
 	if len(maybeKeyword) <= 3 {
+		// If adding a 2 or 3 letter keyword that starts with `i`(if),`f`(for) or `g`(go)
+		// you'd need to add logic to this if statement to differentiate between them.
 		if len(maybeKeyword) == 0 {
 			return 0
 		}
 		return maybeKeyword[0]
 	}
+	// This hash was adjusted by hand. Finding the working combinations
+	// for this hash is quite straightforward, even when restricting all
+	// operations to power-of-two multiplications and addition/subtractions
+	// for performance reasons since multiplication of an integer by a power-of-two
+	// can be optimized to a bitshift which is faster on some architectures.
+	//
+	// Here is a list of hashes that also works for current keyword set:
+	// h = v0 + v1*2 + v2*4 + v3*8
+	// h = v0 + v1*4 + v2*8 + v3
+	// h = v0 + v1*2 + (v2+v3)*2
+	// h = v0*4 + v1*2 + v2*2 + v3*2
+	// h = v0*4 + v1*2 + v2*v3
 	v0 := maybeKeyword[0]
 	v1 := maybeKeyword[1]
 	v2 := maybeKeyword[2]

From dbbeccc0ed76e2f0eda8c1272ce8e1fdafe68f48 Mon Sep 17 00:00:00 2001
From: Patricio Whittingslow <graded.sp@gmail.com>
Date: Wed, 3 Dec 2025 12:32:44 -0300
Subject: [PATCH 3/5] add benchmark to compare keyword proving

---
 .../compile/internal/syntax/parser_test.go    | 58 ++++++++++++++++++-
 src/cmd/compile/internal/syntax/scanner.go    | 41 ++++++++++++-
 2 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go
index b6c4b8fd5693d1..47c8228d13a050 100644
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@@ -38,6 +38,62 @@ func TestVerify(t *testing.T) {
 	verifyPrint(t, *src_, ast)
 }
 
+func BenchmarkParseStdLib(b *testing.B) {
+	if testing.Short() {
+		b.Skip("skipping test in short mode")
+	}
+	var skipRx *regexp.Regexp
+	if *skip != "" {
+		var err error
+		skipRx, err = regexp.Compile(*skip)
+		if err != nil {
+			b.Fatalf("invalid argument for -skip (%v)", err)
+		}
+	}
+	// We read in all files to ignore
+	type file struct {
+		name string
+		base *PosBase
+		data []byte
+	}
+	var largestfile *file
+	var files []file
+	goroot := testenv.GOROOT(b)
+	dirs := []string{
+		filepath.Join(goroot, "src"),
+		filepath.Join(goroot, "misc"),
+	}
+	for _, dir := range dirs {
+		walkDirs(b, dir, func(filename string) {
+			if skipRx != nil && skipRx.MatchString(filename) {
+				// Always report skipped files since regexp
+				// typos can lead to surprising results.
+				fmt.Printf("skipping %s\n", filename)
+				return
+			}
+			data, err := os.ReadFile(filename)
+			if err != nil {
+				b.Fatal(err)
+			}
+			files = append(files, file{
+				name: filename,
+				data: data,
+				base: NewFileBase(filename),
+			})
+			f := &files[len(files)-1]
+			if largestfile == nil || len(f.data) > len(largestfile.data) {
+				largestfile = f
+			}
+		})
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		var buf bytes.Reader
+		buf.Reset(largestfile.data)
+		Parse(largestfile.base, &buf, nil, nil, 0)
+	}
+}
+
 func TestStdLib(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping test in short mode")
@@ -123,7 +179,7 @@ func TestStdLib(t *testing.T) {
 	fmt.Printf("allocated %.3fMb (%.3fMb/s)\n", dm, dm/dt.Seconds())
 }
 
-func walkDirs(t *testing.T, dir string, action func(string)) {
+func walkDirs(t testing.TB, dir string, action func(string)) {
 	entries, err := os.ReadDir(dir)
 	if err != nil {
 		t.Error(err)
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go
index 700908f6bda28a..860f5211710a6c 100644
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -381,7 +381,10 @@ func (s *scanner) ident() {
 	// possibly a keyword
 	lit := s.segment()
 	if len(lit) >= 2 {
-		if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) {
+		// tok := keywordMap[hash(lit)]
+		// tok := keywords[keywordsIndex(lit)]
+		tok := keywordRuntimeMap[string(lit)]
+		if tok != 0 && tokStrFast(tok) == string(lit) {
 			s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
 			s.tok = tok
 			return
@@ -422,15 +425,47 @@ func hash(s []byte) uint {
 }
 
 var keywordMap [1 << 6]token // size must be power of two
+var keywordRuntimeMap = make(map[string]token)
+var keywords [256]Token
+
+// keywordsIndex maps an identifier to an index in keywords array.
+func keywordsIndex(maybeKeyword []byte) uint8 {
+	if len(maybeKeyword) <= 3 {
+		return maybeKeyword[0]
+	}
+	// This hash was adjusted by hand. Finding the working combinations
+	// for this hash is quite straightforward, even when restricting all
+	// operations to power-of-two multiplications and addition/subtractions
+	// for performance reasons since multiplication of an integer by a power-of-two
+	// can be optimized to a bitshift which is faster on some architectures.
+	//
+	// Here is a list of hashes that also works for current keyword set:
+	// h = v0 + v1*2 + v2*4 + v3*8
+	// h = v0 + v1*4 + v2*8 + v3
+	// h = v0 + v1*2 + (v2+v3)*2
+	// h = v0*4 + v1*2 + v2*2 + v3*2
+	// h = v0*4 + v1*2 + v2*v3
+	v0 := maybeKeyword[0]
+	v1 := maybeKeyword[1]
+	v2 := maybeKeyword[2]
+	v3 := maybeKeyword[3]
+	h := v0 + v1*8 + v2 - v3
+	return h
+}
 
 func init() {
 	// populate keywordMap
 	for tok := _Break; tok <= _Var; tok++ {
-		h := hash([]byte(tok.String()))
-		if keywordMap[h] != 0 {
+		kws := tok.String()
+		kw := []byte(kws)
+		i := keywordsIndex(kw)
+		h := hash(kw)
+		if keywordMap[h] != 0 || keywords[i] != 0 {
 			panic("imperfect hash")
 		}
+		keywords[i] = tok
 		keywordMap[h] = tok
+		keywordRuntimeMap[kws] = tok
 	}
 }
 

From 64baebd035b55c272cdd99e5ed069dd51f0b94d6 Mon Sep 17 00:00:00 2001
From: Patricio Whittingslow <graded.sp@gmail.com>
Date: Wed, 3 Dec 2025 14:33:02 -0300
Subject: [PATCH 4/5] use syntax package hash

---
 .../compile/internal/syntax/parser_test.go    | 35 +++++++++---
 src/cmd/compile/internal/syntax/scanner.go    | 41 +-------------
 src/go/token/token.go                         | 55 +++++++------------
 3 files changed, 50 insertions(+), 81 deletions(-)

diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go
index 47c8228d13a050..cdf87a4fe9ecce 100644
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@@ -13,6 +13,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"runtime"
+	"slices"
 	"strings"
 	"sync"
 	"testing"
@@ -56,7 +57,6 @@ func BenchmarkParseStdLib(b *testing.B) {
 		base *PosBase
 		data []byte
 	}
-	var largestfile *file
 	var files []file
 	goroot := testenv.GOROOT(b)
 	dirs := []string{
@@ -80,18 +80,35 @@ func BenchmarkParseStdLib(b *testing.B) {
 				data: data,
 				base: NewFileBase(filename),
 			})
-			f := &files[len(files)-1]
-			if largestfile == nil || len(f.data) > len(largestfile.data) {
-				largestfile = f
-			}
 		})
 	}
+	slices.SortStableFunc(files, func(a, b file) int {
+		return len(a.data) - len(b.data)
+	})
 	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		var buf bytes.Reader
-		buf.Reset(largestfile.data)
-		Parse(largestfile.base, &buf, nil, nil, 0)
+	const numberOfFiles = 10
+	if len(files) < numberOfFiles*2 {
+		b.Error("too few files matched to run")
 	}
+	b.Run(fmt.Sprintf("longest %d files", numberOfFiles), func(b *testing.B) {
+		var buf bytes.Reader
+		for i := 0; i < b.N; i++ {
+			for _, file := range files[len(files)-numberOfFiles:] {
+				buf.Reset(file.data)
+				Parse(file.base, &buf, nil, nil, 0)
+			}
+		}
+	})
+
+	b.Run(fmt.Sprintf("shortest %d files", numberOfFiles), func(b *testing.B) {
+		var buf bytes.Reader
+		for i := 0; i < b.N; i++ {
+			for _, file := range files[:numberOfFiles] {
+				buf.Reset(file.data)
+				Parse(file.base, &buf, nil, nil, 0)
+			}
+		}
+	})
 }
 
 func TestStdLib(t *testing.T) {
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go
index 860f5211710a6c..700908f6bda28a 100644
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -381,10 +381,7 @@ func (s *scanner) ident() {
 	// possibly a keyword
 	lit := s.segment()
 	if len(lit) >= 2 {
-		// tok := keywordMap[hash(lit)]
-		// tok := keywords[keywordsIndex(lit)]
-		tok := keywordRuntimeMap[string(lit)]
-		if tok != 0 && tokStrFast(tok) == string(lit) {
+		if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) {
 			s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
 			s.tok = tok
 			return
@@ -425,47 +422,15 @@ func hash(s []byte) uint {
 }
 
 var keywordMap [1 << 6]token // size must be power of two
-var keywordRuntimeMap = make(map[string]token)
-var keywords [256]Token
-
-// keywordsIndex maps an identifier to an index in keywords array.
-func keywordsIndex(maybeKeyword []byte) uint8 {
-	if len(maybeKeyword) <= 3 {
-		return maybeKeyword[0]
-	}
-	// This hash was adjusted by hand. Finding the working combinations
-	// for this hash is quite straightforward, even when restricting all
-	// operations to power-of-two multiplications and addition/subtractions
-	// for performance reasons since multiplication of an integer by a power-of-two
-	// can be optimized to a bitshift which is faster on some architectures.
-	//
-	// Here is a list of hashes that also works for current keyword set:
-	// h = v0 + v1*2 + v2*4 + v3*8
-	// h = v0 + v1*4 + v2*8 + v3
-	// h = v0 + v1*2 + (v2+v3)*2
-	// h = v0*4 + v1*2 + v2*2 + v3*2
-	// h = v0*4 + v1*2 + v2*v3
-	v0 := maybeKeyword[0]
-	v1 := maybeKeyword[1]
-	v2 := maybeKeyword[2]
-	v3 := maybeKeyword[3]
-	h := v0 + v1*8 + v2 - v3
-	return h
-}
 
 func init() {
 	// populate keywordMap
 	for tok := _Break; tok <= _Var; tok++ {
-		kws := tok.String()
-		kw := []byte(kws)
-		i := keywordsIndex(kw)
-		h := hash(kw)
-		if keywordMap[h] != 0 || keywords[i] != 0 {
+		h := hash([]byte(tok.String()))
+		if keywordMap[h] != 0 {
 			panic("imperfect hash")
 		}
-		keywords[i] = tok
 		keywordMap[h] = tok
-		keywordRuntimeMap[kws] = tok
 	}
 }
 
diff --git a/src/go/token/token.go b/src/go/token/token.go
index 7a92d623da70ac..cba0222454e406 100644
--- a/src/go/token/token.go
+++ b/src/go/token/token.go
@@ -279,47 +279,31 @@ func (op Token) Precedence() int {
 	return LowestPrec
 }
 
-var keywords [256]Token
-
-func init() {
-	for i := keyword_beg + 1; i < keyword_end; i++ {
-		keywords[keywordsIndex(i.String())] = i
-	}
+// hash is a perfect hash function for keywords.
+// It assumes that s has at least length 2.
+func hash(s string) uint {
+	return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
 }
 
-// keywordsIndex maps an identifier to an index in keywords array.
-func keywordsIndex(maybeKeyword string) uint8 {
-	if len(maybeKeyword) <= 3 {
-		// If adding a 2 or 3 letter keyword that starts with `i`(if),`f`(for) or `g`(go)
-		// you'd need to add logic to this if statement to differentiate between them.
-		if len(maybeKeyword) == 0 {
-			return 0
+var keywordMap [1 << 6]Token // size must be power of two
+
+func init() {
+	// populate keywordMap
+	for tok := keyword_beg + 1; tok < keyword_end; tok++ {
+		h := hash(tok.String())
+		if keywordMap[h] != 0 {
+			panic("imperfect hash")
 		}
-		return maybeKeyword[0]
+		keywordMap[h] = tok
 	}
-	// This hash was adjusted by hand. Finding the working combinations
-	// for this hash is quite straightforward, even when restricting all
-	// operations to power-of-two multiplications and addition/subtractions
-	// for performance reasons since multiplication of an integer by a power-of-two
-	// can be optimized to a bitshift which is faster on some architectures.
-	//
-	// Here is a list of hashes that also works for current keyword set:
-	// h = v0 + v1*2 + v2*4 + v3*8
-	// h = v0 + v1*4 + v2*8 + v3
-	// h = v0 + v1*2 + (v2+v3)*2
-	// h = v0*4 + v1*2 + v2*2 + v3*2
-	// h = v0*4 + v1*2 + v2*v3
-	v0 := maybeKeyword[0]
-	v1 := maybeKeyword[1]
-	v2 := maybeKeyword[2]
-	v3 := maybeKeyword[3]
-	h := v0 + v1*8 + v2 - v3
-	return h
 }
 
 // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
 func Lookup(ident string) Token {
-	maybeMatch := keywords[keywordsIndex(ident)]
+	if len(ident) < 2 {
+		return IDENT
+	}
+	maybeMatch := keywordMap[hash(ident)]
 	if maybeMatch != 0 && maybeMatch.String() == ident {
 		return maybeMatch
 	}
@@ -350,7 +334,10 @@ func IsExported(name string) bool {
 
 // IsKeyword reports whether name is a Go keyword, such as "func" or "return".
 func IsKeyword(ident string) bool {
-	tok := keywords[keywordsIndex(ident)]
+	if len(ident) < 2 {
+		return false
+	}
+	tok := keywordMap[hash(ident)]
 	return tok != 0 && tok.String() == ident
 }
 

From a9a01b075048f09e5b5fa08ab011be2e329d3b69 Mon Sep 17 00:00:00 2001
From: Patricio Whittingslow <graded.sp@gmail.com>
Date: Fri, 5 Dec 2025 09:48:17 -0300
Subject: [PATCH 5/5] improve stdlib parsing benchmark by not loading all
 stdlib into memory

---
 .../compile/internal/syntax/parser_test.go    | 30 ++++++++++++++-----
 src/go/token/token.go                         |  5 ++++
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go
index cdf87a4fe9ecce..404317b3c03019 100644
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@@ -39,6 +39,8 @@ func TestVerify(t *testing.T) {
 	verifyPrint(t, *src_, ast)
 }
 
+// To run only this benchmark and obtain results for benchstat:
+// go test -bench=ParseStdLib -benchtime=5s -run none -count=20
 func BenchmarkParseStdLib(b *testing.B) {
 	if testing.Short() {
 		b.Skip("skipping test in short mode")
@@ -55,7 +57,8 @@ func BenchmarkParseStdLib(b *testing.B) {
 	type file struct {
 		name string
 		base *PosBase
-		data []byte
+		data []byte // data populated only for files being tested.
+		size int64
 	}
 	var files []file
 	goroot := testenv.GOROOT(b)
@@ -71,25 +74,37 @@ func BenchmarkParseStdLib(b *testing.B) {
 				fmt.Printf("skipping %s\n", filename)
 				return
 			}
-			data, err := os.ReadFile(filename)
+			info, err := os.Stat(filename)
 			if err != nil {
 				b.Fatal(err)
 			}
 			files = append(files, file{
 				name: filename,
-				data: data,
+				size: info.Size(),
 				base: NewFileBase(filename),
 			})
 		})
 	}
-	slices.SortStableFunc(files, func(a, b file) int {
-		return len(a.data) - len(b.data)
-	})
-	b.ResetTimer()
 	const numberOfFiles = 10
 	if len(files) < numberOfFiles*2 {
 		b.Error("too few files matched to run")
 	}
+	loadFile := func(f *file) {
+		var err error
+		f.data, err = os.ReadFile(f.name)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+	slices.SortStableFunc(files, func(a, b file) int {
+		return int(a.size - b.size)
+	})
+	// We load the files we'll be testing into memory to avoid noise introduced by operating system.
+	for i := 0; i < numberOfFiles; i++ {
+		loadFile(&files[i])              // Load smallest files.
+		loadFile(&files[len(files)-i-1]) // Load largest files.
+	}
+	b.ResetTimer()
 	b.Run(fmt.Sprintf("longest %d files", numberOfFiles), func(b *testing.B) {
 		var buf bytes.Reader
 		for i := 0; i < b.N; i++ {
@@ -99,7 +114,6 @@ func BenchmarkParseStdLib(b *testing.B) {
 			}
 		}
 	})
-
 	b.Run(fmt.Sprintf("shortest %d files", numberOfFiles), func(b *testing.B) {
 		var buf bytes.Reader
 		for i := 0; i < b.N; i++ {
diff --git a/src/go/token/token.go b/src/go/token/token.go
index cba0222454e406..6835fdd2f4ac13 100644
--- a/src/go/token/token.go
+++ b/src/go/token/token.go
@@ -282,9 +282,14 @@ func (op Token) Precedence() int {
 // hash is a perfect hash function for keywords.
 // It assumes that s has at least length 2.
 func hash(s string) uint {
+	// If you get collisions on adding a keyword you'll need to
+	// process more bytes of the identifier since this'll indicate
+	// two keywords share the same first two bytes.
+	// Best course of action is incrementing keyword map size or tuning the hash operations.
 	return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
 }
 
+// keywordMap is a perfect map taken from src/cmd/compile/internal/syntax/scanner.go
 var keywordMap [1 << 6]Token // size must be power of two
 
 func init() {