From 76ea6ea2cb4250f4aa412124e80d6f8347b3aef0 Mon Sep 17 00:00:00 2001 From: "M.V. Hutz" Date: Wed, 11 Feb 2026 21:08:57 -0500 Subject: [PATCH] feat: functional options pattern --- pkg/lambda/scan.go | 15 ++--- pkg/saccharine/scan.go | 27 ++++----- pkg/token/scanner.go | 129 ++++++++++++++++++++++++++--------------- 3 files changed, 104 insertions(+), 67 deletions(-) diff --git a/pkg/lambda/scan.go b/pkg/lambda/scan.go index b6f46a3..dc3c39f 100644 --- a/pkg/lambda/scan.go +++ b/pkg/lambda/scan.go @@ -3,13 +3,14 @@ package lambda import "git.maximhutz.com/max/lambda/pkg/token" // scanner is the declarative lexer for the lambda calculus. -var scanner = token.NewScanner[tokenType](). - On(`\(`, tokenOpenParen, 0). - On(`\)`, tokenCloseParen, 0). - On(`\\`, tokenSlash, 0). - On(`\.`, tokenDot, 0). - On(`[a-zA-Z0-9_]+`, tokenAtom, 0). - Skip(`\s+`, 0) +var scanner = token.NewScanner( + token.On(`\(`, tokenOpenParen, 0), + token.On(`\)`, tokenCloseParen, 0), + token.On(`\\`, tokenSlash, 0), + token.On(`\.`, tokenDot, 0), + token.On(`[a-zA-Z0-9_]+`, tokenAtom, 0), + token.Skip[tokenType](`\s+`, 0), +) // scan tokenizes an input string into lambda calculus tokens. func scan(input string) ([]lambdaToken, error) { diff --git a/pkg/saccharine/scan.go b/pkg/saccharine/scan.go index 7323eb9..48477ea 100644 --- a/pkg/saccharine/scan.go +++ b/pkg/saccharine/scan.go @@ -3,19 +3,20 @@ package saccharine import "git.maximhutz.com/max/lambda/pkg/token" // scanner is the declarative lexer for the Saccharine language. -var scanner = token.NewScanner[TokenType](). - On(`:=`, TokenAssign, 1). - On(`\(`, TokenOpenParen, 0). - On(`\)`, TokenCloseParen, 0). - On(`\{`, TokenOpenBrace, 0). - On(`\}`, TokenCloseBrace, 0). - On(`;`, TokenHardBreak, 0). - On(`\n`, TokenSoftBreak, 0). - On(`\\`, TokenSlash, 0). - On(`\.`, TokenDot, 0). - On(`[a-zA-Z0-9_]+`, TokenAtom, 0). - Skip(`#[^\n]*`, 0). - Skip(`[^\S\n]+`, 0) +var scanner = token.NewScanner( + token.On(`:=`, TokenAssign, 1), + token.On(`\(`, TokenOpenParen, 0), + token.On(`\)`, TokenCloseParen, 0), + token.On(`\{`, TokenOpenBrace, 0), + token.On(`\}`, TokenCloseBrace, 0), + token.On(`;`, TokenHardBreak, 0), + token.On(`\n`, TokenSoftBreak, 0), + token.On(`\\`, TokenSlash, 0), + token.On(`\.`, TokenDot, 0), + token.On(`[a-zA-Z0-9_]+`, TokenAtom, 0), + token.Skip[TokenType](`#[^\n]*`, 0), + token.Skip[TokenType](`[^\S\n]+`, 0), +) // scan tokenizes a string into Saccharine tokens. func scan(input string) ([]Token, error) { diff --git a/pkg/token/scanner.go b/pkg/token/scanner.go index 44da037..a021a14 100644 --- a/pkg/token/scanner.go +++ b/pkg/token/scanner.go @@ -4,83 +4,118 @@ import ( "errors" "fmt" "regexp" + "slices" ) // A rule describes a single lexical pattern for the scanner. type rule[T Type] struct { - pattern *regexp.Regexp - typ T - skip bool + pattern *regexp.Regexp + typ T + precedence int + skip bool } -// A Scanner is a declarative lexer configured by registering regex rules. -// At each position in the input, all rules are tested and the longest match -// wins. -// Ties are broken by registration order (first registered wins). +// compare orders rules by descending precedence. +func (r rule[T]) compare(other rule[T]) int { + return other.precedence - r.precedence +} + +// An Option configures a Scanner during construction. +type Option[T Type] func(rules []rule[T]) []rule[T] + +// On returns an option that registers a token-emitting rule. +// The token's value is the matched text. +// Higher precedence rules are tried first. +func On[T Type](pattern string, typ T, precedence int) Option[T] { + return func(rules []rule[T]) []rule[T] { + return append(rules, rule[T]{ + pattern: compileAnchored(pattern), + typ: typ, + precedence: precedence, + }) + } +} + +// Skip returns an option that registers a non-emitting rule. +// This is used for whitespace and comments. +// Higher precedence rules are tried first. +func Skip[T Type](pattern string, precedence int) Option[T] { + return func(rules []rule[T]) []rule[T] { + return append(rules, rule[T]{ + pattern: compileAnchored(pattern), + precedence: precedence, + skip: true, + }) + } +} + +// A Scanner is a declarative lexer built from a set of regex rules. +// Rules are sorted by precedence (highest first), with registration order as +// tiebreaker. +// At each position, the first matching rule wins. type Scanner[T Type] struct { rules []rule[T] } -// NewScanner creates a new Scanner with no rules. -func NewScanner[T Type]() *Scanner[T] { - return &Scanner[T]{} +// NewScanner creates a Scanner by applying the given options and sorting the +// resulting rules by precedence. +func NewScanner[T Type](opts ...Option[T]) *Scanner[T] { + var rules []rule[T] + for _, opt := range opts { + rules = opt(rules) + } + + slices.SortStableFunc(rules, rule[T].compare) + + return &Scanner[T]{rules: rules} } -// On registers a rule that emits a token of the given type when the pattern -// matches. -// The token's value is the matched text. -func (s *Scanner[T]) On(pattern string, typ T) *Scanner[T] { - s.rules = append(s.rules, rule[T]{ - pattern: compileAnchored(pattern), - typ: typ, - }) - return s -} +// scanOne tries each rule at the current position and returns the first match. +// Returns the token (or nil if skipped) and the number of bytes consumed. +// Returns 0 if no rule matched. +func (s *Scanner[T]) scanOne(input string, pos int) (*Token[T], int) { + for _, r := range s.rules { + loc := r.pattern.FindStringIndex(input[pos:]) + if loc == nil || loc[1] == 0 { + continue + } -// Skip registers a rule that consumes matching text without emitting a token. -// This is used for whitespace and comments. -func (s *Scanner[T]) Skip(pattern string) *Scanner[T] { - s.rules = append(s.rules, rule[T]{ - pattern: compileAnchored(pattern), - skip: true, - }) - return s + if r.skip { + return nil, loc[1] + } + + return &Token[T]{ + Type: r.typ, + Value: input[pos : pos+loc[1]], + Column: pos, + }, loc[1] + } + + return nil, 0 } // Scan tokenizes the input string using the registered rules. -// At each position, all rules are tested and the longest match wins. +// At each position, rules are tried in precedence order and the first match +// wins. // If no rule matches, an error is recorded and the scanner advances one byte. func (s *Scanner[T]) Scan(input string) ([]Token[T], error) { tokens := []Token[T]{} errorList := []error{} for pos := 0; pos < len(input); { - bestLen := 0 - bestRule := -1 + tok, n := s.scanOne(input, pos) - for idx, r := range s.rules { - loc := r.pattern.FindStringIndex(input[pos:]) - if loc != nil && loc[1] > bestLen { - bestLen = loc[1] - bestRule = idx - } - } - - if bestRule == -1 || bestLen == 0 { + if n == 0 { errorList = append(errorList, fmt.Errorf("unknown character '%v'", string(input[pos]))) pos++ continue } - if r := s.rules[bestRule]; !r.skip { - tokens = append(tokens, Token[T]{ - Type: r.typ, - Value: input[pos : pos+bestLen], - Column: pos, - }) + if tok != nil { + tokens = append(tokens, *tok) } - pos += bestLen + pos += n } return tokens, errors.Join(errorList...)