feat: tokenizer

This commit is contained in:
2025-12-23 14:17:43 -05:00
parent 4845acea73
commit 61bb622dcd
8 changed files with 227 additions and 0 deletions

28
pkg/cli/arguments.go Normal file
View File

@@ -0,0 +1,28 @@
package cli
import (
"flag"
"fmt"
"log/slog"
"os"
)
type CLIOptions struct {
Input string
}
func ParseOptions(args []string) (*CLIOptions, error) {
slog.Info("Parsing CLI arguments.", "args", os.Args)
// Parse flags and arguments.
flag.Parse()
switch flag.NArg() {
case 0:
return nil, fmt.Errorf("No input given.")
case 1:
return &CLIOptions{Input: flag.Arg(0)}, nil
default:
return nil, fmt.Errorf("More than 1 command-line argument.")
}
}

15
pkg/cli/exit.go Normal file
View File

@@ -0,0 +1,15 @@
package cli
import (
"fmt"
"os"
)
func HandleError(err error) {
if err == nil {
return
}
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}

39
pkg/iterator/iterator.go Normal file
View File

@@ -0,0 +1,39 @@
package iterator
import "fmt"
type Iterator[T any] struct {
data []T
index int
}
func New[T any](items []T) Iterator[T] {
return Iterator[T]{ data: items, index: 0 }
}
func (i Iterator[T]) Index() int {
return i.index
}
func (i Iterator[T]) IsDone() bool {
return i.index == len(i.data)
}
func (i Iterator[T]) Peek() (T, error) {
var null T
if i.IsDone() {
return null, fmt.Errorf("Iterator is exhausted.")
}
return i.data[i.index], nil
}
func (i *Iterator[T]) Next() (T, error) {
if val, err := i.Peek(); err != nil {
return val, err
} else {
i.index++
return val, nil
}
}

17
pkg/tokenizer/token.go Normal file
View File

@@ -0,0 +1,17 @@
package tokenizer
type TokenType int
const (
TokenOpenParen TokenType = iota
TokenCloseParen
TokenAtom
TokenSlash
TokenDot
)
type Token struct {
Index int
Type TokenType
Value string
}

View File

@@ -0,0 +1,93 @@
package tokenizer
import (
"fmt"
"unicode"
"git.maximhutz.com/max/lambda/pkg/iterator"
)
func getToken(i *iterator.Iterator[rune]) (*Token, error) {
if i.IsDone() {
return nil, nil
}
letter, err := i.Next()
if err != nil {
return nil, fmt.Errorf("Cannot produce next token: %w", err)
}
// If it is an operand.
switch letter {
case '(':
return &Token{
Type: TokenOpenParen,
Index: i.Index(),
Value: string(letter),
}, nil
case ')':
return &Token{
Type: TokenCloseParen,
Index: i.Index(),
Value: string(letter),
}, nil
case '.':
return &Token{
Type: TokenDot,
Index: i.Index(),
Value: string(letter),
}, nil
case '\\':
return &Token{
Type: TokenSlash,
Index: i.Index(),
Value: string(letter),
}, nil
}
// If it is a space.
if unicode.IsSpace(letter) {
return nil, nil
}
// Otherwise, it is an atom.
atom := string(letter)
index := i.Index()
for {
if i.IsDone() {
return nil, nil
}
pop, err := i.Next()
if err != nil {
return nil, err
}
if unicode.IsSpace(pop) {
return &Token{
Index: index,
Type: TokenAtom,
Value: atom,
}, nil
}
atom += string(pop)
}
}
func GetTokens(input []rune) ([]Token, []error) {
i := iterator.New(input)
tokens := []Token{}
errors := []error{}
for !i.IsDone() {
token, err := getToken(&i)
if err != nil {
errors = append(errors, err)
} else if token != nil {
tokens = append(tokens, *token)
}
}
return tokens, errors
}