feat: tokenizer

This commit is contained in:
2025-12-23 14:17:43 -05:00
parent 4845acea73
commit 61bb622dcd
8 changed files with 227 additions and 0 deletions

17
pkg/tokenizer/token.go Normal file
View File

@@ -0,0 +1,17 @@
package tokenizer
type TokenType int
const (
TokenOpenParen TokenType = iota
TokenCloseParen
TokenAtom
TokenSlash
TokenDot
)
type Token struct {
Index int
Type TokenType
Value string
}

View File

@@ -0,0 +1,93 @@
package tokenizer
import (
"fmt"
"unicode"
"git.maximhutz.com/max/lambda/pkg/iterator"
)
func getToken(i *iterator.Iterator[rune]) (*Token, error) {
if i.IsDone() {
return nil, nil
}
letter, err := i.Next()
if err != nil {
return nil, fmt.Errorf("Cannot produce next token: %w", err)
}
// If it is an operand.
switch letter {
case '(':
return &Token{
Type: TokenOpenParen,
Index: i.Index(),
Value: string(letter),
}, nil
case ')':
return &Token{
Type: TokenCloseParen,
Index: i.Index(),
Value: string(letter),
}, nil
case '.':
return &Token{
Type: TokenDot,
Index: i.Index(),
Value: string(letter),
}, nil
case '\\':
return &Token{
Type: TokenSlash,
Index: i.Index(),
Value: string(letter),
}, nil
}
// If it is a space.
if unicode.IsSpace(letter) {
return nil, nil
}
// Otherwise, it is an atom.
atom := string(letter)
index := i.Index()
for {
if i.IsDone() {
return nil, nil
}
pop, err := i.Next()
if err != nil {
return nil, err
}
if unicode.IsSpace(pop) {
return &Token{
Index: index,
Type: TokenAtom,
Value: atom,
}, nil
}
atom += string(pop)
}
}
func GetTokens(input []rune) ([]Token, []error) {
i := iterator.New(input)
tokens := []Token{}
errors := []error{}
for !i.IsDone() {
token, err := getToken(&i)
if err != nil {
errors = append(errors, err)
} else if token != nil {
tokens = append(tokens, *token)
}
}
return tokens, errors
}