Files
lambda/pkg/saccharine/token/parse.go
M.V. Hutz 53f4081f6f fix: correct loop condition in comment parsing
The loop was checking 'for i.Done()' instead of 'for !i.Done()',
which prevented the comment content from being consumed.
This caused the tokenizer to treat comment text as code.
2026-01-12 20:59:34 -05:00

131 lines
2.8 KiB
Go

package token
import (
"errors"
"fmt"
"unicode"
"git.maximhutz.com/max/lambda/pkg/iterator"
"git.maximhutz.com/max/lambda/pkg/trace"
)
// isVariables determines whether a rune can be a valid variable.
func isVariable(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r)
}
func parseRune(i *iterator.Iterator[rune], expected func(rune) bool) (rune, error) {
i2 := i.Copy()
if r, err := i2.Next(); err != nil {
return r, err
} else if !expected(r) {
return r, fmt.Errorf("got unexpected rune %v'", r)
} else {
i.Sync(i2)
return r, nil
}
}
func parseCharacter(i *iterator.Iterator[rune], expected rune) (rune, error) {
i2 := i.Copy()
if r, err := i2.Next(); err != nil {
return r, err
} else if r != expected {
return r, fmt.Errorf("got unexpected rune %v'", r)
} else {
i.Sync(i2)
return r, nil
}
}
// Pulls the next token from an iterator over runes. If it cannot, it will
// return nil. If an error occurs, it will return that.
func getToken(i *iterator.Iterator[rune]) (*Token, error) {
index := i.Index()
if i.Done() {
return nil, nil
}
letter, err := i.Next()
if err != nil {
return nil, trace.Wrap(err, "cannot produce next token")
}
switch {
case letter == '(':
return NewOpenParen(index), nil
case letter == ')':
return NewCloseParen(index), nil
case letter == '.':
return NewDot(index), nil
case letter == '\\':
return NewSlash(index), nil
case letter == '\n':
return NewSoftBreak(index), nil
case letter == '{':
return NewOpenBrace(index), nil
case letter == '}':
return NewCloseBrace(index), nil
case letter == ':':
if _, err := parseCharacter(i, '='); err != nil {
return nil, err
} else {
return NewAssign(index), nil
}
case letter == ';':
return NewHardBreak(index), nil
case letter == '#':
// Skip everything until the next newline or EOF.
for !i.Done() {
r, err := i.Next()
if err != nil {
return nil, trace.Wrap(err, "error while parsing comment")
}
if r == '\n' {
// Put the newline back so it can be processed as a soft break.
i.Back()
break
}
}
return nil, nil
case unicode.IsSpace(letter):
return nil, nil
case isVariable(letter):
atom := []rune{letter}
for {
if r, err := parseRune(i, isVariable); err != nil {
break
} else {
atom = append(atom, r)
}
}
return NewAtom(string(atom), index), nil
}
return nil, fmt.Errorf("unknown character '%v'", string(letter))
}
// Parse a string into tokens.
func Parse(input string) ([]Token, error) {
i := iterator.Of([]rune(input))
tokens := []Token{}
errorList := []error{}
for !i.Done() {
token, err := getToken(i)
if err != nil {
errorList = append(errorList, err)
} else if token != nil {
tokens = append(tokens, *token)
}
}
return tokens, errors.Join(errorList...)
}