From a05a63627e4e0bb55e80650fa74906660a6f6224 Mon Sep 17 00:00:00 2001 From: Max Date: Sat, 27 Dec 2025 01:18:06 -0500 Subject: [PATCH] feat: better recursive descent --- .golangci.yml | 4 +- Makefile | 2 +- README.md | 7 +- cmd/lambda/lambda.go | 4 +- pkg/iterator/iterator.go | 64 ++++++------- pkg/saccharine/ast.go | 49 ---------- pkg/saccharine/ast/node.go | 41 ++++++++ pkg/saccharine/ast/visit.go | 22 +++++ pkg/saccharine/parser.go | 170 ++++++++++++++++++++++------------ pkg/saccharine/stringify.go | 41 ++++---- pkg/saccharine/token.go | 27 ------ pkg/saccharine/token/token.go | 47 ++++++++++ pkg/saccharine/tokenizer.go | 59 ++++++++---- 13 files changed, 316 insertions(+), 221 deletions(-) delete mode 100644 pkg/saccharine/ast.go create mode 100644 pkg/saccharine/ast/node.go create mode 100644 pkg/saccharine/ast/visit.go delete mode 100644 pkg/saccharine/token.go create mode 100644 pkg/saccharine/token/token.go diff --git a/.golangci.yml b/.golangci.yml index e856c70..394989a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -162,7 +162,7 @@ linters: - name: increment-decrement # highlights redundant else-blocks that can be eliminated from the code - - name: indent-error-flow + # - name: indent-error-flow # This rule suggests a shorter way of writing ranges that do not use the second value. - name: range @@ -174,7 +174,7 @@ linters: - name: redefines-builtin-id # redundant else-blocks that can be eliminated from the code. - - name: superfluous-else + # - name: superfluous-else # prevent confusing name for variables when using `time` package - name: time-naming diff --git a/Makefile b/Makefile index ea3bfed..5add4ca 100644 --- a/Makefile +++ b/Makefile @@ -5,4 +5,4 @@ it: @ chmod +x ${BINARY_NAME} ex: it - @ ./lambda.exe - < ./samples/simple.txt \ No newline at end of file + @ ./lambda.exe -v - < ./samples/simple.txt \ No newline at end of file diff --git a/README.md b/README.md index 45266f0..33e1d9a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ # lambda -Making a lambda calculus interpreter in Go. \ No newline at end of file +Making a lambda calculus interpreter in Go. + +## Things to talk about + +- Exhaustive sum types. +- Recursive descent and left-recursion. diff --git a/cmd/lambda/lambda.go b/cmd/lambda/lambda.go index d4366db..75cd86f 100644 --- a/cmd/lambda/lambda.go +++ b/cmd/lambda/lambda.go @@ -28,9 +28,9 @@ func main() { logger.Info("Parsed tokens.", "tokens", tokens) // Turn tokens into syntax tree. - expression, err := saccharine.GetTree(tokens) + expression, err := saccharine.Parse(tokens) cli.HandleError(err) - logger.Info("Parsed syntax tree.", "tree", saccharine.Stringify(expression)) + logger.Info("Parsed syntax tree.", "tree", expression) // Reduce expression. start := time.Now() diff --git a/pkg/iterator/iterator.go b/pkg/iterator/iterator.go index afdfc0e..2aa5dbb 100644 --- a/pkg/iterator/iterator.go +++ b/pkg/iterator/iterator.go @@ -7,13 +7,13 @@ import "fmt" // An iterator over slices. type Iterator[T any] struct { - data []T + items []T index int } // Create a new iterator, over a set of items. -func New[T any](items []T) *Iterator[T] { - return &Iterator[T]{data: items, index: 0} +func Of[T any](items []T) *Iterator[T] { + return &Iterator[T]{items: items, index: 0} } // Returns the current position of the iterator. @@ -21,50 +21,40 @@ func (i Iterator[T]) Index() int { return i.index } -// Returns true if the iterator has no more items to iterate over. -func (i Iterator[T]) IsDone() bool { - return i.index == len(i.data) +func (i Iterator[T]) Copy() *Iterator[T] { + return &Iterator[T]{items: i.items, index: i.index} } -// Gets the next item in the slice, if one exists. Returns an error if there -// isn't one. -func (i Iterator[T]) Peek() (T, error) { - var null T +func (i *Iterator[T]) Sync(o *Iterator[T]) { + i.index = o.index +} - if i.IsDone() { +// Create a new iterator, over a set of items. +func (i Iterator[T]) Get() (T, error) { + var null T + if i.Done() { return null, fmt.Errorf("iterator is exhausted") } - return i.data[i.index], nil + return i.items[i.index], nil } -// Moves the iterator pointer to the next item. Returns the current item. Fails -// if there are no more items to iterate over. -func (i *Iterator[T]) Pop() (T, error) { - val, err := i.Peek() - if err != nil { - return val, err +// Create a new iterator, over a set of items. +func (i *Iterator[T]) Next() (T, error) { + item, err := i.Get() + if err == nil { + i.index++ } - i.index++ - return val, nil + return item, err } -// Pop until the clause returns false. -func (i *Iterator[T]) PopWhile(fn func(T) bool) []T { - result := []T{} - - for { - popped, err := i.Peek() - if err != nil || !fn(popped) { - break - } - - result = append(result, popped) - if _, err := i.Pop(); err != nil { - break - } - } - - return result +// Create a new iterator, over a set of items. +func (i *Iterator[T]) Back() { + i.index = max(i.index-1, 0) +} + +// Returns the current position of the iterator. +func (i Iterator[T]) Done() bool { + return i.index == len(i.items) } diff --git a/pkg/saccharine/ast.go b/pkg/saccharine/ast.go deleted file mode 100644 index 17ddd0e..0000000 --- a/pkg/saccharine/ast.go +++ /dev/null @@ -1,49 +0,0 @@ -package saccharine - -type Node interface { - Accept(Visitor) -} - -/** ------------------------------------------------------------------------- */ - -type Abstraction struct { - Parameters []string - Body Node -} - -type Application struct { - Abstraction Node - Arguments []Node -} - -type Variable struct { - Name string -} - -/** ------------------------------------------------------------------------- */ - -func NewAbstraction(parameter []string, body Node) *Abstraction { - return &Abstraction{Parameters: parameter, Body: body} -} - -func NewApplication(abstraction Node, arguments []Node) *Application { - return &Application{Abstraction: abstraction, Arguments: arguments} -} - -func NewVariable(name string) *Variable { - return &Variable{Name: name} -} - -/** ------------------------------------------------------------------------- */ - -func (a *Abstraction) Accept(x Visitor) { x.VisitAbstraction(a) } -func (a *Application) Accept(x Visitor) { x.VisitApplication(a) } -func (v *Variable) Accept(x Visitor) { x.VisitVariable(v) } - -/** ------------------------------------------------------------------------- */ - -type Visitor interface { - VisitAbstraction(*Abstraction) - VisitApplication(*Application) - VisitVariable(*Variable) -} diff --git a/pkg/saccharine/ast/node.go b/pkg/saccharine/ast/node.go new file mode 100644 index 0000000..c86d83d --- /dev/null +++ b/pkg/saccharine/ast/node.go @@ -0,0 +1,41 @@ +package ast + +type Expression interface { + IsExpression() +} + +/** ------------------------------------------------------------------------- */ + +type Abstraction struct { + Parameters []string + Body Expression +} + +type Application struct { + Abstraction Expression + Arguments []Expression +} + +type Atom struct { + Name string +} + +/** ------------------------------------------------------------------------- */ + +func NewAbstraction(parameter []string, body Expression) *Abstraction { + return &Abstraction{Parameters: parameter, Body: body} +} + +func NewApplication(abstraction Expression, arguments []Expression) *Application { + return &Application{Abstraction: abstraction, Arguments: arguments} +} + +func NewAtom(name string) *Atom { + return &Atom{Name: name} +} + +/** ------------------------------------------------------------------------- */ + +func (a Abstraction) IsExpression() {} +func (a Application) IsExpression() {} +func (v Atom) IsExpression() {} diff --git a/pkg/saccharine/ast/visit.go b/pkg/saccharine/ast/visit.go new file mode 100644 index 0000000..21911d4 --- /dev/null +++ b/pkg/saccharine/ast/visit.go @@ -0,0 +1,22 @@ +package ast + +import "fmt" + +type Visitor[T any] interface { + VisitAtom(*Atom) T + VisitAbstraction(*Abstraction) T + VisitApplication(*Application) T +} + +func Visit[T any](visitor Visitor[T], node Expression) T { + switch node := node.(type) { + case *Atom: + return visitor.VisitAtom(node) + case *Abstraction: + return visitor.VisitAbstraction(node) + case *Application: + return visitor.VisitApplication(node) + default: + panic(fmt.Sprintf("unknown node %t", node)) + } +} diff --git a/pkg/saccharine/parser.go b/pkg/saccharine/parser.go index 08aa5b5..07ff69c 100644 --- a/pkg/saccharine/parser.go +++ b/pkg/saccharine/parser.go @@ -1,80 +1,132 @@ package saccharine import ( + "errors" "fmt" + "log/slog" "git.maximhutz.com/max/lambda/pkg/iterator" + "git.maximhutz.com/max/lambda/pkg/saccharine/ast" + "git.maximhutz.com/max/lambda/pkg/saccharine/token" ) -func isVariableToken(t Token) bool { - return t.Type == TokenVariable +type TokenIterator = iterator.Iterator[token.Token] + +func parseToken(i *TokenIterator, expected token.Type) (*token.Token, error) { + i2 := i.Copy() + + if tok, err := i2.Next(); err != nil { + return nil, err + } else if tok.Type != expected { + return nil, fmt.Errorf("expected token, got %v'", tok.Value) + } else { + i.Sync(i2) + return &tok, nil + } } -func ParseExpression(i *iterator.Iterator[Token]) (Node, error) { - token, err := i.Pop() - if err != nil { - return nil, fmt.Errorf("could not get next token: %w", err) +func parseExpression(i *TokenIterator) (ast.Expression, error) { + slog.Info("attempt exp", "index", i.Index()) + if abs, absErr := parseAbstraction(i); absErr == nil { + slog.Info("got exp") + return abs, nil + } else if atm, atmErr := parseApplication(i); atmErr == nil { + slog.Info("got exp") + return atm, nil + } else if app, appErr := parseAtom(i); appErr == nil { + slog.Info("got exp") + return app, nil + } else { + slog.Info("fail exp") + return nil, errors.Join(absErr, appErr, atmErr) + } +} + +func parseParameters(i *TokenIterator) ([]string, error) { + slog.Info("parse param") + i2 := i.Copy() + variables := []string{} + + for { + if tok, err := parseToken(i2, token.Atom); err != nil { + break + } else { + variables = append(variables, tok.Value) + } } - switch token.Type { - case TokenVariable: - return NewVariable(token.Value), nil - case TokenDot: - return nil, fmt.Errorf("token '.' found without a corresponding slash (column %d)", token.Index) - case TokenSlash: - tokens := i.PopWhile(isVariableToken) - variables := []string{} + slog.Info("got exp") + i.Sync(i2) + return variables, nil +} - for _, token := range tokens { - variables = append(variables, token.Value) - } +func parseAbstraction(i *TokenIterator) (*ast.Abstraction, error) { + slog.Info("attempt abs") + i2 := i.Copy() - if dot, dotErr := i.Pop(); dotErr != nil { - return nil, fmt.Errorf("could not find parameter terminator: %w", dotErr) - } else if dot.Type != TokenDot { - return nil, fmt.Errorf("expected '.', got '%v' (column %d)", dot.Value, dot.Index) - } + if _, err := parseToken(i2, token.Slash); err != nil { + slog.Info("fail abs") + return nil, err + } else if parameters, err := parseParameters(i2); err != nil { + slog.Info("fail abs") + return nil, err + } else if _, err = parseToken(i2, token.Dot); err != nil { + slog.Info("fail abs") + return nil, err + } else if body, err := parseExpression(i2); err != nil { + slog.Info("fail abs") + return nil, err + } else { + slog.Info("got abs") + i.Sync(i2) + return ast.NewAbstraction(parameters, body), nil + } +} - body, bodyErr := ParseExpression(i) - if bodyErr != nil { - return nil, fmt.Errorf("could not parse function body: %w", bodyErr) - } +func parseApplication(i *TokenIterator) (*ast.Application, error) { + slog.Info("attempt app") + i2 := i.Copy() + expressions := []ast.Expression{} - return NewAbstraction(variables, body), nil - case TokenOpenParen: - fn, fnErr := ParseExpression(i) - if fnErr != nil { - return nil, fmt.Errorf("could not parse call function: %w", fnErr) - } - - args := []Node{} - - for { - if next, nextErr := i.Peek(); nextErr == nil && next.Type == TokenCloseParen { - break - } - - arg, argErr := ParseExpression(i) - if argErr != nil { - return nil, fmt.Errorf("could not parse call argument: %w", argErr) - } - - args = append(args, arg) - } - - closing, closingErr := i.Pop() - if closingErr != nil { - return nil, fmt.Errorf("could not parse call terminating parenthesis: %w", closingErr) - } else if closing.Type != TokenCloseParen { - return nil, fmt.Errorf("expected call terminating parenthesis, got '%v' (column %v)", closing.Value, closing.Index) - } - - return NewApplication(fn, args), nil + if _, err := parseToken(i2, token.OpenParen); err != nil { + slog.Info("fail app") + return nil, err } - return nil, fmt.Errorf("unexpected token '%v' (column %d)", token.Value, token.Index) + for { + if exp, err := parseExpression(i2); err != nil { + break + } else { + expressions = append(expressions, exp) + } + } + + if _, err := parseToken(i2, token.CloseParen); err != nil { + slog.Info("fail app") + return nil, err + } + + if len(expressions) == 0 { + slog.Info("fail app") + return nil, fmt.Errorf("application has no arguments") + } + + slog.Info("got app") + i.Sync(i2) + return ast.NewApplication(expressions[0], expressions[1:]), nil } -func GetTree(tokens []Token) (Node, error) { - return ParseExpression(iterator.New(tokens)) +func parseAtom(i *TokenIterator) (*ast.Atom, error) { + slog.Info("attempt atm") + if tok, err := parseToken(i, token.Atom); err != nil { + slog.Info("fail atm") + return nil, err + } else { + slog.Info("got atm") + return ast.NewAtom(tok.Value), nil + } +} + +func Parse(i *TokenIterator) (ast.Expression, error) { + return parseExpression(i) } diff --git a/pkg/saccharine/stringify.go b/pkg/saccharine/stringify.go index 0a08226..18b9941 100644 --- a/pkg/saccharine/stringify.go +++ b/pkg/saccharine/stringify.go @@ -1,36 +1,31 @@ package saccharine -import "strings" +import ( + "strings" -type stringifyVisitor struct { - builder strings.Builder + "git.maximhutz.com/max/lambda/pkg/saccharine/ast" +) + +type stringifyVisitor struct{} + +func (v stringifyVisitor) VisitAtom(n *ast.Atom) string { + return n.Name } -func (v *stringifyVisitor) VisitVariable(a *Variable) { - v.builder.WriteString(a.Name) +func (v stringifyVisitor) VisitAbstraction(n *ast.Abstraction) string { + return "\\" + strings.Join(n.Parameters, " ") + "." + ast.Visit(v, n.Body) } -func (v *stringifyVisitor) VisitAbstraction(f *Abstraction) { - v.builder.WriteRune('\\') - v.builder.WriteString(strings.Join(f.Parameters, " ")) - v.builder.WriteRune('.') - f.Body.Accept(v) -} +func (v stringifyVisitor) VisitApplication(n *ast.Application) string { + arguments := []string{ast.Visit(v, n.Abstraction)} -func (v *stringifyVisitor) VisitApplication(c *Application) { - v.builder.WriteRune('(') - c.Abstraction.Accept(v) - - for _, argument := range c.Arguments { - v.builder.WriteRune(' ') - argument.Accept(v) + for _, argument := range n.Arguments { + arguments = append(arguments, ast.Visit(v, argument)) } - v.builder.WriteRune(')') + return "(" + strings.Join(arguments, " ") + ")" } -func Stringify(n Node) string { - b := &stringifyVisitor{} - n.Accept(b) - return b.builder.String() +func Stringify(n ast.Expression) string { + return ast.Visit(&stringifyVisitor{}, n) } diff --git a/pkg/saccharine/token.go b/pkg/saccharine/token.go deleted file mode 100644 index f21091c..0000000 --- a/pkg/saccharine/token.go +++ /dev/null @@ -1,27 +0,0 @@ -package saccharine - -// All tokens in the pseudo-lambda language. -type TokenType int - -const ( - // Denotes the '(' token. - TokenOpenParen TokenType = iota - // Denotes the ')' token. - TokenCloseParen - // Denotes an alpha-numeric variable. - TokenVariable - // Denotes the '/' token. - TokenSlash - // Denotes the '.' token. - TokenDot -) - -// A representation of a token in source code. -type Token struct { - // Where the token begins in the source text. - Index int - // What type the token is. - Type TokenType - // The value of the token. - Value string -} diff --git a/pkg/saccharine/token/token.go b/pkg/saccharine/token/token.go new file mode 100644 index 0000000..bc0704b --- /dev/null +++ b/pkg/saccharine/token/token.go @@ -0,0 +1,47 @@ +package token + +// All tokens in the pseudo-lambda language. +type Type int + +const ( + // Denotes the '(' token. + OpenParen Type = iota + // Denotes the ')' token. + CloseParen + // Denotes an alpha-numeric variable. + Atom + // Denotes the '/' token. + Slash + // Denotes the '.' token. + Dot +) + +// A representation of a token in source code. +type Token struct { + // Where the token begins in the source text. + Index int + // What type the token is. + Type Type + // The value of the token. + Value string +} + +func NewOpenParen(index int) *Token { + return &Token{Type: OpenParen, Index: index, Value: "("} +} + +func NewCloseParen(index int) *Token { + return &Token{Type: CloseParen, Index: index, Value: ")"} +} + +func NewDot(index int) *Token { + return &Token{Type: Dot, Index: index, Value: "."} +} + +func NewSlash(index int) *Token { + return &Token{Type: Slash, Index: index, Value: "\\"} +} + +func NewAtom(name string, index int) *Token { + return &Token{Type: Atom, Index: index, Value: name} +} diff --git a/pkg/saccharine/tokenizer.go b/pkg/saccharine/tokenizer.go index 598f5eb..0013bb5 100644 --- a/pkg/saccharine/tokenizer.go +++ b/pkg/saccharine/tokenizer.go @@ -3,9 +3,11 @@ package saccharine import ( "errors" "fmt" + "log/slog" "unicode" "git.maximhutz.com/max/lambda/pkg/iterator" + "git.maximhutz.com/max/lambda/pkg/saccharine/token" ) // isVariables determines whether a rune can be a valid variable. @@ -13,60 +15,77 @@ func isVariable(r rune) bool { return unicode.IsLetter(r) || unicode.IsNumber(r) } +func parseRune(i *iterator.Iterator[rune], expected func(rune) bool) (rune, error) { + i2 := i.Copy() + + if r, err := i2.Next(); err != nil { + return r, err + } else if !expected(r) { + return r, fmt.Errorf("got unexpected rune %v'", r) + } else { + i.Sync(i2) + return r, nil + } +} + // Pulls the next token from an iterator over runes. If it cannot, it will // return nil. If an error occurs, it will return that. -func getToken(i *iterator.Iterator[rune]) (*Token, error) { +func getToken(i *iterator.Iterator[rune]) (*token.Token, error) { index := i.Index() - if i.IsDone() { + if i.Done() { return nil, nil } - letter, err := i.Pop() + letter, err := i.Next() if err != nil { return nil, fmt.Errorf("cannot produce next token: %w", err) } switch { case letter == '(': - // The opening deliminator of an application. - return &Token{Type: TokenOpenParen, Index: index, Value: string(letter)}, nil + return token.NewOpenParen(index), nil case letter == ')': - // The terminator of an application. - return &Token{Type: TokenCloseParen, Index: index, Value: string(letter)}, nil + return token.NewCloseParen(index), nil case letter == '.': - // The terminator of the parameters in an abstraction. - return &Token{Type: TokenDot, Index: index, Value: string(letter)}, nil + return token.NewDot(index), nil case letter == '\\': - // The opening deliminator of an abstraction. - return &Token{Type: TokenSlash, Index: index, Value: string(letter)}, nil + return token.NewSlash(index), nil case unicode.IsSpace(letter): - // If there is a space character, ignore it. return nil, nil case isVariable(letter): - rest := i.PopWhile(isVariable) - atom := string(append([]rune{letter}, rest...)) + atom := []rune{letter} - return &Token{Index: index, Type: TokenVariable, Value: atom}, nil + for { + if r, err := parseRune(i, isVariable); err != nil { + break + } else { + atom = append(atom, r) + } + } + + return token.NewAtom(string(atom), index), nil } return nil, fmt.Errorf("unknown character '%v'", letter) } // Parses a list of runes into tokens. All error encountered are returned, as well. -func GetTokens(input []rune) ([]Token, error) { - i := iterator.New(input) - tokens := []Token{} +func GetTokens(input []rune) (*iterator.Iterator[token.Token], error) { + i := iterator.Of(input) + tokens := []token.Token{} errorList := []error{} - for !i.IsDone() { + for !i.Done() { token, err := getToken(i) if err != nil { + slog.Info("token error", "error", err) errorList = append(errorList, err) } else if token != nil { + slog.Info("token parsed", "token", token) tokens = append(tokens, *token) } } - return tokens, errors.Join(errorList...) + return iterator.Of(tokens), errors.Join(errorList...) }