|
| 1 | +package sqlite3 |
| 2 | + |
| 3 | +import ( |
| 4 | + "bufio" |
| 5 | + "io" |
| 6 | + "regexp" |
| 7 | + "strings" |
| 8 | + "unicode" |
| 9 | + "unicode/utf8" |
| 10 | + |
| 11 | + // Namespace imports |
| 12 | + . "github.com/djthorpe/go-errors" |
| 13 | + . "github.com/djthorpe/go-sqlite/pkg/quote" |
| 14 | +) |
| 15 | + |
| 16 | +//////////////////////////////////////////////////////////////////////////////// |
| 17 | +// TYPES |
| 18 | + |
| 19 | +type Tokenizer struct { |
| 20 | + *bufio.Scanner |
| 21 | +} |
| 22 | + |
| 23 | +type ( |
| 24 | + KeywordToken string |
| 25 | + TypeToken string |
| 26 | + NameToken string |
| 27 | + ValueToken string |
| 28 | + PuncuationToken string |
| 29 | + WhitespaceToken string |
| 30 | +) |
| 31 | + |
| 32 | +//////////////////////////////////////////////////////////////////////////////// |
| 33 | +// GLOBALS |
| 34 | + |
| 35 | +var ( |
| 36 | + reWhitespace = regexp.MustCompile(`^\s*$`) |
| 37 | + reName = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`) |
| 38 | + reNumber = regexp.MustCompile(`^[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+)$`) |
| 39 | +) |
| 40 | + |
| 41 | +//////////////////////////////////////////////////////////////////////////////// |
| 42 | +// LIFECYCLE |
| 43 | + |
| 44 | +func NewTokenizer(v string) *Tokenizer { |
| 45 | + t := &Tokenizer{bufio.NewScanner(strings.NewReader(v))} |
| 46 | + t.Scanner.Split(sqlSplit) |
| 47 | + return t |
| 48 | +} |
| 49 | + |
| 50 | +//////////////////////////////////////////////////////////////////////////////// |
| 51 | +// METHODS |
| 52 | + |
| 53 | +func (t *Tokenizer) Next() (interface{}, error) { |
| 54 | + if t.Scanner.Scan() { |
| 55 | + txt := t.Scanner.Text() |
| 56 | + return toToken(txt), nil |
| 57 | + } |
| 58 | + if t.Scanner.Err() != nil { |
| 59 | + return nil, t.Scanner.Err() |
| 60 | + } else { |
| 61 | + return nil, io.EOF |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +func toToken(v string) interface{} { |
| 66 | + if reWhitespace.MatchString(v) { |
| 67 | + return WhitespaceToken(v) |
| 68 | + } else if IsReservedWord(v) { |
| 69 | + return KeywordToken(v) |
| 70 | + } else if IsType(v) { |
| 71 | + return TypeToken(v) |
| 72 | + } else if reName.MatchString(v) { |
| 73 | + return NameToken(v) |
| 74 | + } else if reNumber.MatchString(v) { |
| 75 | + return ValueToken(v) |
| 76 | + } else { |
| 77 | + return PuncuationToken(v) |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +func sqlSplit(data []byte, atEOF bool) (int, []byte, error) { |
| 82 | + advance, token, err := bufio.ScanWords(data, atEOF) |
| 83 | + if err != nil { |
| 84 | + return advance, token, err |
| 85 | + } |
| 86 | + |
| 87 | + // Check first letter for non-letter or non-digit |
| 88 | + r, width := utf8.DecodeRune(data) |
| 89 | + if width == 0 { |
| 90 | + return 0, token, ErrBadParameter.With("Invalid string") |
| 91 | + } |
| 92 | + if !(unicode.IsDigit(r) || unicode.IsLetter(r) || r == '_') { |
| 93 | + return width, []byte(string(r)), nil |
| 94 | + } |
| 95 | + |
| 96 | + // Count until non-letter or non-digit |
| 97 | + for i := width; i < len(data); i += width { |
| 98 | + r, width = utf8.DecodeRune(data[i:]) |
| 99 | + if width == 0 { |
| 100 | + return 0, token, ErrBadParameter.With("Invalid string") |
| 101 | + } |
| 102 | + if !(unicode.IsDigit(r) || unicode.IsLetter(r) || r == '_') { |
| 103 | + return i, data[:i], nil |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + // Return a word |
| 108 | + return advance, token, nil |
| 109 | +} |
0 commit comments