feat: functional options pattern

feat: undo
feat: scanner added
2026-02-11 21:08:57 -05:00 · 2026-02-11 20:54:05 -05:00 · 2026-02-11 20:28:29 -05:00
6 changed files with 157 additions and 181 deletions
--- a/pkg/lambda/scan.go
+++ b/pkg/lambda/scan.go
@@ -1,45 +1,18 @@
 package lambda

-import (
-	"fmt"
-	"unicode"
+import "git.maximhutz.com/max/lambda/pkg/token"

-	"git.maximhutz.com/max/lambda/pkg/iterator"
-	"git.maximhutz.com/max/lambda/pkg/token"
+// scanner is the declarative lexer for the lambda calculus.
+var scanner = token.NewScanner(
+	token.On(`\(`, tokenOpenParen, 0),
+	token.On(`\)`, tokenCloseParen, 0),
+	token.On(`\\`, tokenSlash, 0),
+	token.On(`\.`, tokenDot, 0),
+	token.On(`[a-zA-Z0-9_]+`, tokenAtom, 0),
+	token.Skip[tokenType](`\s+`, 0),
 )

-// scanToken pulls the next lambda calculus token from a rune iterator.
-func scanToken(i *iterator.Iterator[rune]) (*lambdaToken, error) {
-	index := i.Index()
-
-	if i.Done() {
-		return nil, nil
-	}
-
-	letter, err := i.Next()
-	if err != nil {
-		return nil, fmt.Errorf("cannot produce next token: %w", err)
-	}
-
-	switch {
-	case letter == '(':
-		return token.New(tokenOpenParen, index), nil
-	case letter == ')':
-		return token.New(tokenCloseParen, index), nil
-	case letter == '\\':
-		return token.New(tokenSlash, index), nil
-	case letter == '.':
-		return token.New(tokenDot, index), nil
-	case unicode.IsSpace(letter):
-		return nil, nil
-	case token.IsVariable(letter):
-		return token.ScanAtom(i, letter, tokenAtom, index), nil
-	}
-
-	return nil, fmt.Errorf("unknown character '%v'", string(letter))
-}
-
 // scan tokenizes an input string into lambda calculus tokens.
 func scan(input string) ([]lambdaToken, error) {
-	return token.Scan(input, scanToken)
+	return scanner.Scan(input)
 }
--- a/pkg/saccharine/scan.go
+++ b/pkg/saccharine/scan.go
@@ -1,64 +1,24 @@
 package saccharine

-import (
-	"fmt"
-	"unicode"
+import "git.maximhutz.com/max/lambda/pkg/token"

-	"git.maximhutz.com/max/lambda/pkg/iterator"
-	"git.maximhutz.com/max/lambda/pkg/token"
+// scanner is the declarative lexer for the Saccharine language.
+var scanner = token.NewScanner(
+	token.On(`:=`, TokenAssign, 1),
+	token.On(`\(`, TokenOpenParen, 0),
+	token.On(`\)`, TokenCloseParen, 0),
+	token.On(`\{`, TokenOpenBrace, 0),
+	token.On(`\}`, TokenCloseBrace, 0),
+	token.On(`;`, TokenHardBreak, 0),
+	token.On(`\n`, TokenSoftBreak, 0),
+	token.On(`\\`, TokenSlash, 0),
+	token.On(`\.`, TokenDot, 0),
+	token.On(`[a-zA-Z0-9_]+`, TokenAtom, 0),
+	token.Skip[TokenType](`#[^\n]*`, 0),
+	token.Skip[TokenType](`[^\S\n]+`, 0),
 )

-// Pulls the next token from an iterator over runes. If it cannot, it will
-// return nil. If an error occurs, it will return that.
-func scanToken(i *iterator.Iterator[rune]) (*Token, error) {
-	index := i.Index()
-
-	if i.Done() {
-		return nil, nil
-	}
-
-	letter, err := i.Next()
-	if err != nil {
-		return nil, fmt.Errorf("cannot produce next token: %w", err)
-	}
-
-	switch {
-	case letter == '(':
-		return token.New(TokenOpenParen, index), nil
-	case letter == ')':
-		return token.New(TokenCloseParen, index), nil
-	case letter == '.':
-		return token.New(TokenDot, index), nil
-	case letter == '\\':
-		return token.New(TokenSlash, index), nil
-	case letter == '\n':
-		return token.New(TokenSoftBreak, index), nil
-	case letter == '{':
-		return token.New(TokenOpenBrace, index), nil
-	case letter == '}':
-		return token.New(TokenCloseBrace, index), nil
-	case letter == ':':
-		if _, err := token.ScanCharacter(i, '='); err != nil {
-			return nil, err
-		} else {
-			return token.New(TokenAssign, index), nil
-		}
-	case letter == ';':
-		return token.New(TokenHardBreak, index), nil
-	case letter == '#':
-		// Skip everything until the next newline or EOF.
-		i.While(func(r rune) bool { return r != '\n' })
-		return nil, nil
-	case unicode.IsSpace(letter):
-		return nil, nil
-	case token.IsVariable(letter):
-		return token.ScanAtom(i, letter, TokenAtom, index), nil
-	}
-
-	return nil, fmt.Errorf("unknown character '%v'", string(letter))
-}
-
-// scan a string into tokens.
+// scan tokenizes a string into Saccharine tokens.
 func scan(input string) ([]Token, error) {
-	return token.Scan(input, scanToken)
+	return scanner.Scan(input)
 }
--- a/pkg/token/parse.go
+++ b/pkg/token/parse.go
@@ -16,7 +16,7 @@ func ParseRawToken[T Type](i *iterator.Iterator[Token[T]], expected T) (*Token[T
 		return nil, err
 	}
 	if tok.Type != expected {
-		return nil, fmt.Errorf("expected token %v, got %v'", expected.Name(), tok.Value)
+		return nil, fmt.Errorf("expected token '%v', got '%v'", expected.Name(), tok.Value)
 	}
 	i.Forward()
 	return &tok, nil
--- a/pkg/token/scan.go
+++ b/pkg/token/scan.go
@@ -1,74 +0,0 @@
-package token
-
-import (
-	"errors"
-	"fmt"
-	"unicode"
-
-	"git.maximhutz.com/max/lambda/pkg/iterator"
-)
-
-// IsVariable determines whether a rune can be a valid variable character.
-func IsVariable(r rune) bool {
-	return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
-}
-
-// ScanRune consumes the next rune from the iterator if it satisfies the
-// predicate.
-// Returns an error if the iterator is exhausted or the rune does not match.
-func ScanRune(i *iterator.Iterator[rune], expected func(rune) bool) (rune, error) {
-	r, err := i.Get()
-	if err != nil {
-		return r, err
-	}
-	if !expected(r) {
-		return r, fmt.Errorf("got unexpected rune %v'", r)
-	}
-	i.Forward()
-	return r, nil
-}
-
-// ScanCharacter consumes the next rune from the iterator if it matches the
-// expected rune exactly.
-// Returns an error if the iterator is exhausted or the rune does not match.
-func ScanCharacter(i *iterator.Iterator[rune], expected rune) (rune, error) {
-	return ScanRune(i, func(r rune) bool { return r == expected })
-}
-
-// ScanAtom scans a contiguous sequence of variable characters into a single
-// atom token.
-// The first rune has already been consumed and is passed in.
-func ScanAtom[T Type](i *iterator.Iterator[rune], first rune, typ T, column int) *Token[T] {
-	atom := []rune{first}
-
-	for {
-		if r, err := ScanRune(i, IsVariable); err != nil {
-			break
-		} else {
-			atom = append(atom, r)
-		}
-	}
-
-	return NewAtom(typ, string(atom), column)
-}
-
-// Scan tokenizes an input string using a language-specific scanToken function.
-// The scanToken function is called repeatedly until the input is exhausted.
-// It returns nil (no token, no error) for skippable input like whitespace.
-// Errors are accumulated and returned joined at the end.
-func Scan[T Type](input string, scanToken func(*iterator.Iterator[rune]) (*Token[T], error)) ([]Token[T], error) {
-	i := iterator.Of([]rune(input))
-	tokens := []Token[T]{}
-	errorList := []error{}
-
-	for !i.Done() {
-		token, err := scanToken(i)
-		if err != nil {
-			errorList = append(errorList, err)
-		} else if token != nil {
-			tokens = append(tokens, *token)
-		}
-	}
-
-	return tokens, errors.Join(errorList...)
-}
--- a/pkg/token/scanner.go
+++ b/pkg/token/scanner.go
@@ -0,0 +1,129 @@
+package token
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"slices"
+)
+
+// A rule describes a single lexical pattern for the scanner.
+type rule[T Type] struct {
+	pattern    *regexp.Regexp
+	typ        T
+	precedence int
+	skip       bool
+}
+
+// compare orders rules by descending precedence.
+func (r rule[T]) compare(other rule[T]) int {
+	return other.precedence - r.precedence
+}
+
+// An Option configures a Scanner during construction.
+type Option[T Type] func(rules []rule[T]) []rule[T]
+
+// On returns an option that registers a token-emitting rule.
+// The token's value is the matched text.
+// Higher precedence rules are tried first.
+func On[T Type](pattern string, typ T, precedence int) Option[T] {
+	return func(rules []rule[T]) []rule[T] {
+		return append(rules, rule[T]{
+			pattern:    compileAnchored(pattern),
+			typ:        typ,
+			precedence: precedence,
+		})
+	}
+}
+
+// Skip returns an option that registers a non-emitting rule.
+// This is used for whitespace and comments.
+// Higher precedence rules are tried first.
+func Skip[T Type](pattern string, precedence int) Option[T] {
+	return func(rules []rule[T]) []rule[T] {
+		return append(rules, rule[T]{
+			pattern:    compileAnchored(pattern),
+			precedence: precedence,
+			skip:       true,
+		})
+	}
+}
+
+// A Scanner is a declarative lexer built from a set of regex rules.
+// Rules are sorted by precedence (highest first), with registration order as
+// tiebreaker.
+// At each position, the first matching rule wins.
+type Scanner[T Type] struct {
+	rules []rule[T]
+}
+
+// NewScanner creates a Scanner by applying the given options and sorting the
+// resulting rules by precedence.
+func NewScanner[T Type](opts ...Option[T]) *Scanner[T] {
+	var rules []rule[T]
+	for _, opt := range opts {
+		rules = opt(rules)
+	}
+
+	slices.SortStableFunc(rules, rule[T].compare)
+
+	return &Scanner[T]{rules: rules}
+}
+
+// scanOne tries each rule at the current position and returns the first match.
+// Returns the token (or nil if skipped) and the number of bytes consumed.
+// Returns 0 if no rule matched.
+func (s *Scanner[T]) scanOne(input string, pos int) (*Token[T], int) {
+	for _, r := range s.rules {
+		loc := r.pattern.FindStringIndex(input[pos:])
+		if loc == nil || loc[1] == 0 {
+			continue
+		}
+
+		if r.skip {
+			return nil, loc[1]
+		}
+
+		return &Token[T]{
+			Type:   r.typ,
+			Value:  input[pos : pos+loc[1]],
+			Column: pos,
+		}, loc[1]
+	}
+
+	return nil, 0
+}
+
+// Scan tokenizes the input string using the registered rules.
+// At each position, rules are tried in precedence order and the first match
+// wins.
+// If no rule matches, an error is recorded and the scanner advances one byte.
+func (s *Scanner[T]) Scan(input string) ([]Token[T], error) {
+	tokens := []Token[T]{}
+	errorList := []error{}
+
+	for pos := 0; pos < len(input); {
+		tok, n := s.scanOne(input, pos)
+
+		if n == 0 {
+			errorList = append(errorList, fmt.Errorf("unknown character '%v'", string(input[pos])))
+			pos++
+			continue
+		}
+
+		if tok != nil {
+			tokens = append(tokens, *tok)
+		}
+
+		pos += n
+	}
+
+	return tokens, errors.Join(errorList...)
+}
+
+// compileAnchored compiles a regex pattern, prepending \A so it only matches
+// at the current scan position.
+// Patterns must not be pre-anchored.
+func compileAnchored(pattern string) *regexp.Regexp {
+	return regexp.MustCompile(`\A(?:` + pattern + `)`)
+}
--- a/pkg/token/token.go
+++ b/pkg/token/token.go
@@ -18,18 +18,6 @@ type Token[T Type] struct {
 	Value  string // The value of the token.
 }

-// New creates a Token of the given type at the given column.
-// The token's value is derived from its type's Name method.
-func New[T Type](typ T, column int) *Token[T] {
-	return &Token[T]{Type: typ, Column: column, Value: typ.Name()}
-}
-
-// NewAtom creates a Token of the given type with a custom value at the given
-// column.
-func NewAtom[T Type](typ T, name string, column int) *Token[T] {
-	return &Token[T]{Type: typ, Column: column, Value: name}
-}
-
 // Name returns the type of the Token, as a string.
 func (t Token[T]) Name() string {
 	return t.Type.Name()
Author	SHA1	Message	Date
M.V. Hutz	76ea6ea2cb	feat: functional options pattern	2026-02-11 21:08:57 -05:00
M.V. Hutz	3b7cf21eb7	feat: undo	2026-02-11 20:54:05 -05:00
M.V. Hutz	b3f9f08c62	feat: scanner added	2026-02-11 20:28:29 -05:00