## Description The saccharine language previously lacked comment support, preventing proper code documentation. This PR implements '#' comment syntax similar to Python. Comments can appear on their own line or at the end of a line, with all content after '#' ignored until the next newline or EOF. The tokenizer now detects '#' and skips characters appropriately without creating tokens. ### Decisions Comments are silently consumed during tokenization rather than being preserved as tokens, keeping the token stream clean for the parser. The implementation preserves newlines after comments by using the iterator's Back() method, allowing them to be processed as soft breaks. ## Benefits Developers can now document their saccharine code with inline and full-line comments. The implementation is minimal and efficient, adding no overhead to the token stream. Tests verify that comments work correctly in various positions without breaking code execution. ## Checklist - [x] Code follows conventional commit format. - [x] Branch follows naming convention (`<type>/<description>`). Always use underscores. - [x] Tests pass (if applicable). - [x] Documentation updated (if applicable). Closes #24 Reviewed-on: #25 Co-authored-by: M.V. Hutz <git@maximhutz.me> Co-committed-by: M.V. Hutz <git@maximhutz.me>
131 lines
2.8 KiB
Go
131 lines
2.8 KiB
Go
package token
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"unicode"
|
|
|
|
"git.maximhutz.com/max/lambda/pkg/iterator"
|
|
"git.maximhutz.com/max/lambda/pkg/trace"
|
|
)
|
|
|
|
// isVariables determines whether a rune can be a valid variable.
|
|
func isVariable(r rune) bool {
|
|
return unicode.IsLetter(r) || unicode.IsNumber(r)
|
|
}
|
|
|
|
func parseRune(i *iterator.Iterator[rune], expected func(rune) bool) (rune, error) {
|
|
i2 := i.Copy()
|
|
|
|
if r, err := i2.Next(); err != nil {
|
|
return r, err
|
|
} else if !expected(r) {
|
|
return r, fmt.Errorf("got unexpected rune %v'", r)
|
|
} else {
|
|
i.Sync(i2)
|
|
return r, nil
|
|
}
|
|
}
|
|
|
|
func parseCharacter(i *iterator.Iterator[rune], expected rune) (rune, error) {
|
|
i2 := i.Copy()
|
|
|
|
if r, err := i2.Next(); err != nil {
|
|
return r, err
|
|
} else if r != expected {
|
|
return r, fmt.Errorf("got unexpected rune %v'", r)
|
|
} else {
|
|
i.Sync(i2)
|
|
return r, nil
|
|
}
|
|
}
|
|
|
|
// Pulls the next token from an iterator over runes. If it cannot, it will
|
|
// return nil. If an error occurs, it will return that.
|
|
func getToken(i *iterator.Iterator[rune]) (*Token, error) {
|
|
index := i.Index()
|
|
|
|
if i.Done() {
|
|
return nil, nil
|
|
}
|
|
|
|
letter, err := i.Next()
|
|
if err != nil {
|
|
return nil, trace.Wrap(err, "cannot produce next token")
|
|
}
|
|
|
|
switch {
|
|
case letter == '(':
|
|
return NewOpenParen(index), nil
|
|
case letter == ')':
|
|
return NewCloseParen(index), nil
|
|
case letter == '.':
|
|
return NewDot(index), nil
|
|
case letter == '\\':
|
|
return NewSlash(index), nil
|
|
case letter == '\n':
|
|
return NewSoftBreak(index), nil
|
|
case letter == '{':
|
|
return NewOpenBrace(index), nil
|
|
case letter == '}':
|
|
return NewCloseBrace(index), nil
|
|
case letter == ':':
|
|
if _, err := parseCharacter(i, '='); err != nil {
|
|
return nil, err
|
|
} else {
|
|
return NewAssign(index), nil
|
|
}
|
|
case letter == ';':
|
|
return NewHardBreak(index), nil
|
|
case letter == '#':
|
|
// Skip everything until the next newline or EOF.
|
|
for !i.Done() {
|
|
r, err := i.Next()
|
|
if err != nil {
|
|
return nil, trace.Wrap(err, "error while parsing comment")
|
|
}
|
|
|
|
if r == '\n' {
|
|
// Put the newline back so it can be processed as a soft break.
|
|
i.Back()
|
|
break
|
|
}
|
|
}
|
|
return nil, nil
|
|
case unicode.IsSpace(letter):
|
|
return nil, nil
|
|
case isVariable(letter):
|
|
atom := []rune{letter}
|
|
|
|
for {
|
|
if r, err := parseRune(i, isVariable); err != nil {
|
|
break
|
|
} else {
|
|
atom = append(atom, r)
|
|
}
|
|
}
|
|
|
|
return NewAtom(string(atom), index), nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unknown character '%v'", string(letter))
|
|
}
|
|
|
|
// Parse a string into tokens.
|
|
func Parse(input string) ([]Token, error) {
|
|
i := iterator.Of([]rune(input))
|
|
tokens := []Token{}
|
|
errorList := []error{}
|
|
|
|
for !i.Done() {
|
|
token, err := getToken(i)
|
|
if err != nil {
|
|
errorList = append(errorList, err)
|
|
} else if token != nil {
|
|
tokens = append(tokens, *token)
|
|
}
|
|
}
|
|
|
|
return tokens, errors.Join(errorList...)
|
|
}
|