From 61bb622dcd6494b9191c79c96c24b2cab59c40e5 Mon Sep 17 00:00:00 2001 From: Max Date: Tue, 23 Dec 2025 14:17:43 -0500 Subject: [PATCH] feat: tokenizer --- Makefile | 8 ++++ cmd/lambda/lambda.go | 24 ++++++++++ go.mod | 3 ++ pkg/cli/arguments.go | 28 ++++++++++++ pkg/cli/exit.go | 15 ++++++ pkg/iterator/iterator.go | 39 ++++++++++++++++ pkg/tokenizer/token.go | 17 +++++++ pkg/tokenizer/tokenizer.go | 93 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 227 insertions(+) create mode 100644 Makefile create mode 100644 cmd/lambda/lambda.go create mode 100644 go.mod create mode 100644 pkg/cli/arguments.go create mode 100644 pkg/cli/exit.go create mode 100644 pkg/iterator/iterator.go create mode 100644 pkg/tokenizer/token.go create mode 100644 pkg/tokenizer/tokenizer.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0fac9f5 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +BINARY_NAME=lambda.exe + +it: + @ go build -o ${BINARY_NAME} ./cmd/lambda + @ chmod +x ${BINARY_NAME} + +ex1: it + @ ./lambda.exe "(\n.\f.\x.(f ((n f) x)) \f.\x.x)" \ No newline at end of file diff --git a/cmd/lambda/lambda.go b/cmd/lambda/lambda.go new file mode 100644 index 0000000..46c5e82 --- /dev/null +++ b/cmd/lambda/lambda.go @@ -0,0 +1,24 @@ +package main + +import ( + "errors" + "log/slog" + "os" + + "git.maximhutz.com/max/lambda/pkg/cli" + "git.maximhutz.com/max/lambda/pkg/tokenizer" +) + +func main() { + slog.Info("Using program arguments.", "args", os.Args) + options, err := cli.ParseOptions(os.Args[1:]) + cli.HandleError(err) + + slog.Info("Parsed CLI options.", "options", options) + tokens, fails := tokenizer.GetTokens([]rune(options.Input)) + if len(fails) > 0 { + cli.HandleError(errors.Join(fails...)) + } + + slog.Info("Parsed tokens.", "tokens", tokens) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d3ec3aa --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.maximhutz.com/max/lambda + +go 1.25.5 diff --git a/pkg/cli/arguments.go b/pkg/cli/arguments.go new file mode 100644 index 0000000..a52acff --- /dev/null +++ b/pkg/cli/arguments.go @@ -0,0 +1,28 @@ +package cli + +import ( + "flag" + "fmt" + "log/slog" + "os" +) + +type CLIOptions struct { + Input string +} + +func ParseOptions(args []string) (*CLIOptions, error) { + slog.Info("Parsing CLI arguments.", "args", os.Args) + + // Parse flags and arguments. + flag.Parse() + + switch flag.NArg() { + case 0: + return nil, fmt.Errorf("No input given.") + case 1: + return &CLIOptions{Input: flag.Arg(0)}, nil + default: + return nil, fmt.Errorf("More than 1 command-line argument.") + } +} \ No newline at end of file diff --git a/pkg/cli/exit.go b/pkg/cli/exit.go new file mode 100644 index 0000000..6ede677 --- /dev/null +++ b/pkg/cli/exit.go @@ -0,0 +1,15 @@ +package cli + +import ( + "fmt" + "os" +) + +func HandleError(err error) { + if err == nil { + return + } + + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} \ No newline at end of file diff --git a/pkg/iterator/iterator.go b/pkg/iterator/iterator.go new file mode 100644 index 0000000..6e56d7f --- /dev/null +++ b/pkg/iterator/iterator.go @@ -0,0 +1,39 @@ +package iterator + +import "fmt" + +type Iterator[T any] struct { + data []T + index int +} + +func New[T any](items []T) Iterator[T] { + return Iterator[T]{ data: items, index: 0 } +} + +func (i Iterator[T]) Index() int { + return i.index +} + +func (i Iterator[T]) IsDone() bool { + return i.index == len(i.data) +} + +func (i Iterator[T]) Peek() (T, error) { + var null T + + if i.IsDone() { + return null, fmt.Errorf("Iterator is exhausted.") + } + + return i.data[i.index], nil +} + +func (i *Iterator[T]) Next() (T, error) { + if val, err := i.Peek(); err != nil { + return val, err + } else { + i.index++ + return val, nil + } +} diff --git a/pkg/tokenizer/token.go b/pkg/tokenizer/token.go new file mode 100644 index 0000000..7ab9aa4 --- /dev/null +++ b/pkg/tokenizer/token.go @@ -0,0 +1,17 @@ +package tokenizer + +type TokenType int + +const ( + TokenOpenParen TokenType = iota + TokenCloseParen + TokenAtom + TokenSlash + TokenDot +) + +type Token struct { + Index int + Type TokenType + Value string +} diff --git a/pkg/tokenizer/tokenizer.go b/pkg/tokenizer/tokenizer.go new file mode 100644 index 0000000..d18e806 --- /dev/null +++ b/pkg/tokenizer/tokenizer.go @@ -0,0 +1,93 @@ +package tokenizer + +import ( + "fmt" + "unicode" + + "git.maximhutz.com/max/lambda/pkg/iterator" +) + +func getToken(i *iterator.Iterator[rune]) (*Token, error) { + if i.IsDone() { + return nil, nil + } + + letter, err := i.Next() + if err != nil { + return nil, fmt.Errorf("Cannot produce next token: %w", err) + } + + // If it is an operand. + switch letter { + case '(': + return &Token{ + Type: TokenOpenParen, + Index: i.Index(), + Value: string(letter), + }, nil + case ')': + return &Token{ + Type: TokenCloseParen, + Index: i.Index(), + Value: string(letter), + }, nil + case '.': + return &Token{ + Type: TokenDot, + Index: i.Index(), + Value: string(letter), + }, nil + case '\\': + return &Token{ + Type: TokenSlash, + Index: i.Index(), + Value: string(letter), + }, nil + } + + // If it is a space. + if unicode.IsSpace(letter) { + return nil, nil + } + + // Otherwise, it is an atom. + atom := string(letter) + index := i.Index() + for { + if i.IsDone() { + return nil, nil + } + + pop, err := i.Next() + if err != nil { + return nil, err + } + + if unicode.IsSpace(pop) { + return &Token{ + Index: index, + Type: TokenAtom, + Value: atom, + }, nil + } + + atom += string(pop) + } +} + +func GetTokens(input []rune) ([]Token, []error) { + i := iterator.New(input) + tokens := []Token{} + errors := []error{} + + for !i.IsDone() { + token, err := getToken(&i) + if err != nil { + errors = append(errors, err) + } else if token != nil { + tokens = append(tokens, *token) + } + } + + return tokens, errors +} \ No newline at end of file