From 3b7cf21eb7ba59c4953b0a3a8b35dac23b21846b Mon Sep 17 00:00:00 2001 From: "M.V. Hutz" Date: Wed, 11 Feb 2026 20:54:05 -0500 Subject: [PATCH] feat: undo --- pkg/lambda/scan.go | 12 ++++++------ pkg/saccharine/scan.go | 24 ++++++++++++------------ pkg/token/scanner.go | 32 +++++++++----------------------- 3 files changed, 27 insertions(+), 41 deletions(-) diff --git a/pkg/lambda/scan.go b/pkg/lambda/scan.go index b1f08ad..b6f46a3 100644 --- a/pkg/lambda/scan.go +++ b/pkg/lambda/scan.go @@ -4,12 +4,12 @@ import "git.maximhutz.com/max/lambda/pkg/token" // scanner is the declarative lexer for the lambda calculus. var scanner = token.NewScanner[tokenType](). - On(`\(`, tokenOpenParen). - On(`\)`, tokenCloseParen). - On(`\\`, tokenSlash). - On(`\.`, tokenDot). - On(`[a-zA-Z0-9_]+`, tokenAtom). - Skip(`\s+`) + On(`\(`, tokenOpenParen, 0). + On(`\)`, tokenCloseParen, 0). + On(`\\`, tokenSlash, 0). + On(`\.`, tokenDot, 0). + On(`[a-zA-Z0-9_]+`, tokenAtom, 0). + Skip(`\s+`, 0) // scan tokenizes an input string into lambda calculus tokens. func scan(input string) ([]lambdaToken, error) { diff --git a/pkg/saccharine/scan.go b/pkg/saccharine/scan.go index f784fef..7323eb9 100644 --- a/pkg/saccharine/scan.go +++ b/pkg/saccharine/scan.go @@ -4,18 +4,18 @@ import "git.maximhutz.com/max/lambda/pkg/token" // scanner is the declarative lexer for the Saccharine language. var scanner = token.NewScanner[TokenType](). - On(`:=`, TokenAssign). - On(`\(`, TokenOpenParen). - On(`\)`, TokenCloseParen). - On(`\{`, TokenOpenBrace). - On(`\}`, TokenCloseBrace). - On(`;`, TokenHardBreak). - On(`\n`, TokenSoftBreak). - On(`\\`, TokenSlash). - On(`\.`, TokenDot). - On(`[a-zA-Z0-9_]+`, TokenAtom). - Skip(`#[^\n]*`). - Skip(`[^\S\n]+`) + On(`:=`, TokenAssign, 1). + On(`\(`, TokenOpenParen, 0). + On(`\)`, TokenCloseParen, 0). + On(`\{`, TokenOpenBrace, 0). + On(`\}`, TokenCloseBrace, 0). + On(`;`, TokenHardBreak, 0). + On(`\n`, TokenSoftBreak, 0). + On(`\\`, TokenSlash, 0). + On(`\.`, TokenDot, 0). + On(`[a-zA-Z0-9_]+`, TokenAtom, 0). + Skip(`#[^\n]*`, 0). + Skip(`[^\S\n]+`, 0) // scan tokenizes a string into Saccharine tokens. func scan(input string) ([]Token, error) { diff --git a/pkg/token/scanner.go b/pkg/token/scanner.go index e423fc4..44da037 100644 --- a/pkg/token/scanner.go +++ b/pkg/token/scanner.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "regexp" - "unicode/utf8" ) // A rule describes a single lexical pattern for the scanner. @@ -41,10 +40,8 @@ func (s *Scanner[T]) On(pattern string, typ T) *Scanner[T] { // Skip registers a rule that consumes matching text without emitting a token. // This is used for whitespace and comments. func (s *Scanner[T]) Skip(pattern string) *Scanner[T] { - var zero T s.rules = append(s.rules, rule[T]{ pattern: compileAnchored(pattern), - typ: zero, skip: true, }) return s @@ -52,48 +49,37 @@ func (s *Scanner[T]) Skip(pattern string) *Scanner[T] { // Scan tokenizes the input string using the registered rules. // At each position, all rules are tested and the longest match wins. -// If no rule matches, an error is recorded and the scanner advances one rune. +// If no rule matches, an error is recorded and the scanner advances one byte. func (s *Scanner[T]) Scan(input string) ([]Token[T], error) { tokens := []Token[T]{} errorList := []error{} - pos := 0 - column := 0 - for pos < len(input) { + for pos := 0; pos < len(input); { bestLen := 0 bestRule := -1 for idx, r := range s.rules { loc := r.pattern.FindStringIndex(input[pos:]) - if loc == nil { - continue - } - if matchLen := loc[1]; matchLen > bestLen { - bestLen = matchLen + if loc != nil && loc[1] > bestLen { + bestLen = loc[1] bestRule = idx } } if bestRule == -1 || bestLen == 0 { - _, size := utf8.DecodeRuneInString(input[pos:]) - errorList = append(errorList, fmt.Errorf("unknown character '%v'", input[pos:pos+size])) - pos += size - column++ + errorList = append(errorList, fmt.Errorf("unknown character '%v'", string(input[pos]))) + pos++ continue } - matched := input[pos : pos+bestLen] - r := s.rules[bestRule] - - if !r.skip { + if r := s.rules[bestRule]; !r.skip { tokens = append(tokens, Token[T]{ Type: r.typ, - Value: matched, - Column: column, + Value: input[pos : pos+bestLen], + Column: pos, }) } - column += utf8.RuneCountInString(matched) pos += bestLen }