The loop was checking 'for i.Done()' instead of 'for !i.Done()', which prevented the comment content from being consumed. This caused the tokenizer to treat comment text as code.
131 lines
2.8 KiB
Go
131 lines
2.8 KiB
Go
package token
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"unicode"
|
|
|
|
"git.maximhutz.com/max/lambda/pkg/iterator"
|
|
"git.maximhutz.com/max/lambda/pkg/trace"
|
|
)
|
|
|
|
// isVariables determines whether a rune can be a valid variable.
|
|
func isVariable(r rune) bool {
|
|
return unicode.IsLetter(r) || unicode.IsNumber(r)
|
|
}
|
|
|
|
func parseRune(i *iterator.Iterator[rune], expected func(rune) bool) (rune, error) {
|
|
i2 := i.Copy()
|
|
|
|
if r, err := i2.Next(); err != nil {
|
|
return r, err
|
|
} else if !expected(r) {
|
|
return r, fmt.Errorf("got unexpected rune %v'", r)
|
|
} else {
|
|
i.Sync(i2)
|
|
return r, nil
|
|
}
|
|
}
|
|
|
|
func parseCharacter(i *iterator.Iterator[rune], expected rune) (rune, error) {
|
|
i2 := i.Copy()
|
|
|
|
if r, err := i2.Next(); err != nil {
|
|
return r, err
|
|
} else if r != expected {
|
|
return r, fmt.Errorf("got unexpected rune %v'", r)
|
|
} else {
|
|
i.Sync(i2)
|
|
return r, nil
|
|
}
|
|
}
|
|
|
|
// Pulls the next token from an iterator over runes. If it cannot, it will
|
|
// return nil. If an error occurs, it will return that.
|
|
func getToken(i *iterator.Iterator[rune]) (*Token, error) {
|
|
index := i.Index()
|
|
|
|
if i.Done() {
|
|
return nil, nil
|
|
}
|
|
|
|
letter, err := i.Next()
|
|
if err != nil {
|
|
return nil, trace.Wrap(err, "cannot produce next token")
|
|
}
|
|
|
|
switch {
|
|
case letter == '(':
|
|
return NewOpenParen(index), nil
|
|
case letter == ')':
|
|
return NewCloseParen(index), nil
|
|
case letter == '.':
|
|
return NewDot(index), nil
|
|
case letter == '\\':
|
|
return NewSlash(index), nil
|
|
case letter == '\n':
|
|
return NewSoftBreak(index), nil
|
|
case letter == '{':
|
|
return NewOpenBrace(index), nil
|
|
case letter == '}':
|
|
return NewCloseBrace(index), nil
|
|
case letter == ':':
|
|
if _, err := parseCharacter(i, '='); err != nil {
|
|
return nil, err
|
|
} else {
|
|
return NewAssign(index), nil
|
|
}
|
|
case letter == ';':
|
|
return NewHardBreak(index), nil
|
|
case letter == '#':
|
|
// Skip everything until the next newline or EOF.
|
|
for !i.Done() {
|
|
r, err := i.Next()
|
|
if err != nil {
|
|
return nil, trace.Wrap(err, "error while parsing comment")
|
|
}
|
|
|
|
if r == '\n' {
|
|
// Put the newline back so it can be processed as a soft break.
|
|
i.Back()
|
|
break
|
|
}
|
|
}
|
|
return nil, nil
|
|
case unicode.IsSpace(letter):
|
|
return nil, nil
|
|
case isVariable(letter):
|
|
atom := []rune{letter}
|
|
|
|
for {
|
|
if r, err := parseRune(i, isVariable); err != nil {
|
|
break
|
|
} else {
|
|
atom = append(atom, r)
|
|
}
|
|
}
|
|
|
|
return NewAtom(string(atom), index), nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unknown character '%v'", string(letter))
|
|
}
|
|
|
|
// Parse a string into tokens.
|
|
func Parse(input string) ([]Token, error) {
|
|
i := iterator.Of([]rune(input))
|
|
tokens := []Token{}
|
|
errorList := []error{}
|
|
|
|
for !i.Done() {
|
|
token, err := getToken(i)
|
|
if err != nil {
|
|
errorList = append(errorList, err)
|
|
} else if token != nil {
|
|
tokens = append(tokens, *token)
|
|
}
|
|
}
|
|
|
|
return tokens, errors.Join(errorList...)
|
|
}
|