## Description Both the `saccharine` and `lambda` packages need tokenizing and parsing primitives. This PR extracts shared token infrastructure into a new `pkg/token` package, then wires both languages up to use it. - Add `pkg/token` with a generic `Token[T]` type, `Scan`, `ScanAtom`, `ScanRune`, `ScanCharacter`, `IsVariable`, `ParseRawToken`, and `ParseList`. - Refactor `pkg/saccharine` to delegate to `pkg/token`, removing duplicated scanning and parsing helpers. - Implement `Codec.Decode` for `pkg/lambda` (scanner + parser) using the shared token package. - Add `iterator.While` for predicate-driven iteration. - Rename `iterator.Do` to `iterator.Try` to better describe its rollback semantics. ### Decisions - The `Type` constraint (`comparable` + `Name() string`) keeps the generic token flexible while ensuring every token type can produce readable error messages. - `iterator.Do` was renamed to `iterator.Try` since it describes a try/rollback operation, not a side-effecting "do". ## Benefits - Eliminates duplicated token, scanning, and parsing code between languages. - Enables the `lambda` package to decode (parse) lambda calculus strings, which was previously unimplemented. - Makes it straightforward to add new languages by reusing `pkg/token` primitives. ## Checklist - [x] Code follows conventional commit format. - [x] Branch follows naming convention (`<type>/<description>`). Always use underscores. - [x] Tests pass (if applicable). - [ ] Documentation updated (if applicable). Reviewed-on: #46 Co-authored-by: M.V. Hutz <git@maximhutz.me> Co-committed-by: M.V. Hutz <git@maximhutz.me>
201 lines
5.5 KiB
Go
201 lines
5.5 KiB
Go
package saccharine
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
|
|
"git.maximhutz.com/max/lambda/pkg/iterator"
|
|
"git.maximhutz.com/max/lambda/pkg/token"
|
|
)
|
|
|
|
type tokenIterator = iterator.Iterator[Token]
|
|
|
|
func passSoftBreaks(i *tokenIterator) {
|
|
for {
|
|
if _, err := token.ParseRawToken(i, TokenSoftBreak); err != nil {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func parseToken(i *tokenIterator, expected TokenType, ignoreSoftBreaks bool) (*Token, error) {
|
|
return iterator.Try(i, func(i *tokenIterator) (*Token, error) {
|
|
if ignoreSoftBreaks {
|
|
passSoftBreaks(i)
|
|
}
|
|
|
|
return token.ParseRawToken(i, expected)
|
|
})
|
|
}
|
|
|
|
func parseString(i *tokenIterator) (string, error) {
|
|
if tok, err := parseToken(i, TokenAtom, true); err != nil {
|
|
return "", fmt.Errorf("no variable (col %d): %w", i.Index(), err)
|
|
} else {
|
|
return tok.Value, nil
|
|
}
|
|
}
|
|
|
|
func parseBreak(i *tokenIterator) (*Token, error) {
|
|
if tok, softErr := token.ParseRawToken(i, TokenSoftBreak); softErr == nil {
|
|
return tok, nil
|
|
} else if tok, hardErr := token.ParseRawToken(i, TokenHardBreak); hardErr == nil {
|
|
return tok, nil
|
|
} else {
|
|
return nil, errors.Join(softErr, hardErr)
|
|
}
|
|
}
|
|
|
|
func parseAbstraction(i *tokenIterator) (*Abstraction, error) {
|
|
return iterator.Try(i, func(i *tokenIterator) (*Abstraction, error) {
|
|
if _, err := parseToken(i, TokenSlash, true); err != nil {
|
|
return nil, fmt.Errorf("no function slash (col %d): %w", i.MustGet().Column, err)
|
|
} else if parameters, err := token.ParseList(i, parseString, 0); err != nil {
|
|
return nil, err
|
|
} else if _, err = parseToken(i, TokenDot, true); err != nil {
|
|
return nil, fmt.Errorf("no function dot (col %d): %w", i.MustGet().Column, err)
|
|
} else if body, err := parseExpression(i); err != nil {
|
|
return nil, err
|
|
} else {
|
|
return &Abstraction{Parameters: parameters, Body: body}, nil
|
|
}
|
|
})
|
|
}
|
|
|
|
func parseApplication(i *tokenIterator) (*Application, error) {
|
|
return iterator.Try(i, func(i *tokenIterator) (*Application, error) {
|
|
if _, err := parseToken(i, TokenOpenParen, true); err != nil {
|
|
return nil, fmt.Errorf("no openning brackets (col %d): %w", i.MustGet().Column, err)
|
|
} else if expressions, err := token.ParseList(i, parseExpression, 1); err != nil {
|
|
return nil, err
|
|
} else if _, err := parseToken(i, TokenCloseParen, true); err != nil {
|
|
return nil, fmt.Errorf("no closing brackets (col %d): %w", i.MustGet().Column, err)
|
|
} else {
|
|
return &Application{Abstraction: expressions[0], Arguments: expressions[1:]}, nil
|
|
}
|
|
})
|
|
}
|
|
|
|
func parseAtom(i *tokenIterator) (*Atom, error) {
|
|
if tok, err := parseToken(i, TokenAtom, true); err != nil {
|
|
return nil, fmt.Errorf("no variable (col %d): %w", i.Index(), err)
|
|
} else {
|
|
return &Atom{Name: tok.Value}, nil
|
|
}
|
|
}
|
|
|
|
func parseStatements(i *tokenIterator) ([]Statement, error) {
|
|
statements := []Statement{}
|
|
|
|
//nolint:errcheck
|
|
token.ParseList(i, parseBreak, 0)
|
|
|
|
for {
|
|
if statement, err := parseStatement(i); err != nil {
|
|
break
|
|
} else if _, err := token.ParseList(i, parseBreak, 1); err != nil && !i.Done() {
|
|
break
|
|
} else {
|
|
statements = append(statements, statement)
|
|
}
|
|
}
|
|
|
|
return statements, nil
|
|
}
|
|
|
|
func parseClause(i *tokenIterator, braces bool) (*Clause, error) {
|
|
if braces {
|
|
if _, err := parseToken(i, TokenOpenBrace, true); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
var stmts []Statement
|
|
var last *DeclareStatement
|
|
var err error
|
|
var ok bool
|
|
|
|
if stmts, err = parseStatements(i); err != nil {
|
|
return nil, err
|
|
} else if len(stmts) == 0 {
|
|
return nil, fmt.Errorf("no statements in clause")
|
|
} else if last, ok = stmts[len(stmts)-1].(*DeclareStatement); !ok {
|
|
return nil, fmt.Errorf("this clause contains no final return value (col %d)", i.MustGet().Column)
|
|
}
|
|
|
|
if braces {
|
|
if _, err := parseToken(i, TokenCloseBrace, true); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return &Clause{Statements: stmts[:len(stmts)-1], Returns: last.Value}, nil
|
|
}
|
|
|
|
func parseExpression(i *tokenIterator) (Expression, error) {
|
|
return iterator.Try(i, func(i *tokenIterator) (Expression, error) {
|
|
passSoftBreaks(i)
|
|
|
|
switch peek := i.MustGet(); peek.Type {
|
|
case TokenOpenParen:
|
|
return parseApplication(i)
|
|
case TokenSlash:
|
|
return parseAbstraction(i)
|
|
case TokenAtom:
|
|
return parseAtom(i)
|
|
case TokenOpenBrace:
|
|
return parseClause(i, true)
|
|
default:
|
|
return nil, fmt.Errorf("expected expression, got '%v' (col %d)", peek.Value, peek.Column)
|
|
}
|
|
})
|
|
}
|
|
|
|
func parseLet(i *tokenIterator) (*LetStatement, error) {
|
|
return iterator.Try(i, func(i *tokenIterator) (*LetStatement, error) {
|
|
if parameters, err := token.ParseList(i, parseString, 1); err != nil {
|
|
return nil, err
|
|
} else if _, err := parseToken(i, TokenAssign, true); err != nil {
|
|
return nil, err
|
|
} else if body, err := parseExpression(i); err != nil {
|
|
return nil, err
|
|
} else {
|
|
return &LetStatement{Name: parameters[0], Parameters: parameters[1:], Body: body}, nil
|
|
}
|
|
})
|
|
}
|
|
|
|
func parseDeclare(i *tokenIterator) (*DeclareStatement, error) {
|
|
if value, err := parseExpression(i); err != nil {
|
|
return nil, err
|
|
} else {
|
|
return &DeclareStatement{Value: value}, nil
|
|
}
|
|
}
|
|
|
|
func parseStatement(i *tokenIterator) (Statement, error) {
|
|
if let, letErr := parseLet(i); letErr == nil {
|
|
return let, nil
|
|
} else if declare, declErr := parseDeclare(i); declErr == nil {
|
|
return declare, nil
|
|
} else {
|
|
return nil, errors.Join(letErr, declErr)
|
|
}
|
|
}
|
|
|
|
// Given a list of tokens, attempt to parse it into an syntax tree.
|
|
func parse(tokens []Token) (Expression, error) {
|
|
i := iterator.Of(tokens)
|
|
|
|
exp, err := parseClause(i, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !i.Done() {
|
|
return nil, fmt.Errorf("expected EOF, found more code (col %d)", i.MustGet().Column)
|
|
}
|
|
|
|
return exp, nil
|
|
}
|