From 14fc4b30da00e697a27ad339ba2f21672cb2d58c Mon Sep 17 00:00:00 2001 From: Max Date: Sat, 27 Dec 2025 20:46:10 -0500 Subject: [PATCH] feat: cleaner parsing functions --- pkg/iterator/iterator.go | 17 ++++ pkg/saccharine/parser.go | 155 ++++++++++++++++++---------------- pkg/saccharine/token/token.go | 20 +++-- pkg/saccharine/tokenizer.go | 10 +-- pkg/trace/trace.go | 4 +- 5 files changed, 121 insertions(+), 85 deletions(-) diff --git a/pkg/iterator/iterator.go b/pkg/iterator/iterator.go index 56aa5bd..2db5cd4 100644 --- a/pkg/iterator/iterator.go +++ b/pkg/iterator/iterator.go @@ -48,6 +48,12 @@ func (i Iterator[T]) MustGet() T { return i.items[i.index] } +func (i *Iterator[T]) Forward() { + if !i.Done() { + i.index++ + } +} + // Create a new iterator, over a set of items. func (i *Iterator[T]) Next() (T, error) { item, err := i.Get() @@ -67,3 +73,14 @@ func (i *Iterator[T]) Back() { func (i Iterator[T]) Done() bool { return i.index == len(i.items) } + +func Do[T any, U any](i *Iterator[T], fn func(i *Iterator[T]) (U, error)) (U, error) { + i2 := i.Copy() + + out, err := fn(i2) + if err == nil { + i.Sync(i2) + } + + return out, err +} diff --git a/pkg/saccharine/parser.go b/pkg/saccharine/parser.go index b2cda63..ad27ed7 100644 --- a/pkg/saccharine/parser.go +++ b/pkg/saccharine/parser.go @@ -11,101 +11,114 @@ import ( type TokenIterator = iterator.Iterator[token.Token] -func parseToken(i *TokenIterator, expected token.Type) (*token.Token, error) { - i2 := i.Copy() +func parseRawToken(i *TokenIterator, expected token.Type) (*token.Token, error) { + return iterator.Do(i, func(i *TokenIterator) (*token.Token, error) { + if tok, err := i.Next(); err != nil { + return nil, err + } else if tok.Type != expected { + return nil, fmt.Errorf("expected token %v, got %v'", token.Name(expected), tok.Value) + } else { + return &tok, nil + } + }) +} - if tok, err := i2.Next(); err != nil { - return nil, err - } else if tok.Type != expected { - return nil, fmt.Errorf("expected token %v, got %v'", token.Name(expected), tok.Value) - } else { - i.Sync(i2) - return &tok, nil +func passSoftBreaks(i *TokenIterator) { + for { + if _, err := parseRawToken(i, token.SoftBreak); err != nil { + return + } } } +func parseToken(i *TokenIterator, expected token.Type, ignoreSoftBreaks bool) (*token.Token, error) { + return iterator.Do(i, func(i *TokenIterator) (*token.Token, error) { + if ignoreSoftBreaks { + passSoftBreaks(i) + } + + return parseRawToken(i, expected) + }) +} + func parseExpression(i *TokenIterator) (ast.Expression, error) { - var err error - var exp ast.Expression - peek := i.MustGet() + return iterator.Do(i, func(i *TokenIterator) (ast.Expression, error) { + passSoftBreaks(i) + peek := i.MustGet() - switch peek.Type { - case token.OpenParen: - exp, err = parseApplication(i) - case token.Slash: - exp, err = parseAbstraction(i) - case token.Atom: - exp, err = parseAtom(i) - default: - return nil, fmt.Errorf("expected expression, got '%v' (col %d)", peek.Value, peek.Column) - } - - return exp, err + switch peek.Type { + case token.OpenParen: + return parseApplication(i) + case token.Slash: + return parseAbstraction(i) + case token.Atom: + return parseAtom(i) + default: + return nil, fmt.Errorf("expected expression, got '%v' (col %d)", peek.Value, peek.Column) + } + }) } func parseParameters(i *TokenIterator) ([]string, error) { - i2 := i.Copy() - variables := []string{} + return iterator.Do(i, func(i *TokenIterator) ([]string, error) { + variables := []string{} - for { - if tok, err := parseToken(i2, token.Atom); err != nil { - break - } else { - variables = append(variables, tok.Value) + for { + if tok, err := parseToken(i, token.Atom, true); err != nil { + return variables, nil + } else { + variables = append(variables, tok.Value) + } } - } - - i.Sync(i2) - return variables, nil + }) } func parseAbstraction(i *TokenIterator) (*ast.Abstraction, error) { - i2 := i.Copy() - - if _, err := parseToken(i2, token.Slash); err != nil { - return nil, trace.WrapError(fmt.Errorf("no function slash (col %d)", i2.MustGet().Column), err) - } else if parameters, err := parseParameters(i2); err != nil { - return nil, err - } else if _, err = parseToken(i2, token.Dot); err != nil { - return nil, trace.WrapError(fmt.Errorf("no function dot (col %d)", i2.MustGet().Column), err) - } else if body, err := parseExpression(i2); err != nil { - return nil, err - } else { - i.Sync(i2) - return ast.NewAbstraction(parameters, body), nil - } + return iterator.Do(i, func(i *TokenIterator) (*ast.Abstraction, error) { + if _, err := parseToken(i, token.Slash, true); err != nil { + return nil, trace.Wrap(err, "no function slash (col %d)", i.MustGet().Column) + } else if parameters, err := parseParameters(i); err != nil { + return nil, err + } else if _, err = parseToken(i, token.Dot, true); err != nil { + return nil, trace.Wrap(err, "no function dot (col %d)", i.MustGet().Column) + } else if body, err := parseExpression(i); err != nil { + return nil, err + } else { + return ast.NewAbstraction(parameters, body), nil + } + }) } func parseApplication(i *TokenIterator) (*ast.Application, error) { - i2 := i.Copy() - expressions := []ast.Expression{} + return iterator.Do(i, func(i *TokenIterator) (*ast.Application, error) { + expressions := []ast.Expression{} - if _, err := parseToken(i2, token.OpenParen); err != nil { - return nil, trace.WrapError(fmt.Errorf("no openning brackets (col %d)", i2.MustGet().Column), err) - } - - for { - if exp, err := parseExpression(i2); err != nil { - if len(expressions) == 0 { - return nil, trace.WrapError(fmt.Errorf("application has no arguments"), err) - } - break - } else { - expressions = append(expressions, exp) + if _, err := parseToken(i, token.OpenParen, true); err != nil { + return nil, trace.Wrap(err, "no openning brackets (col %d)", i.MustGet().Column) } - } - if _, err := parseToken(i2, token.CloseParen); err != nil { - return nil, trace.WrapError(fmt.Errorf("no closing brackets (col %d)", i2.MustGet().Column), err) - } + for { + if exp, err := parseExpression(i); err != nil { + if len(expressions) == 0 { + return nil, trace.Wrap(err, "application has no arguments") + } + break + } else { + expressions = append(expressions, exp) + } + } - i.Sync(i2) - return ast.NewApplication(expressions[0], expressions[1:]), nil + if _, err := parseToken(i, token.CloseParen, true); err != nil { + return nil, trace.Wrap(err, "no closing brackets (col %d)", i.MustGet().Column) + } + + return ast.NewApplication(expressions[0], expressions[1:]), nil + }) } func parseAtom(i *TokenIterator) (*ast.Atom, error) { - if tok, err := parseToken(i, token.Atom); err != nil { - return nil, trace.WrapError(fmt.Errorf("no variable (col %d)", i.Index()), err) + if tok, err := parseToken(i, token.Atom, true); err != nil { + return nil, trace.Wrap(err, "no variable (col %d)", i.Index()) } else { return ast.NewAtom(tok.Value), nil } diff --git a/pkg/saccharine/token/token.go b/pkg/saccharine/token/token.go index a8800d6..57771a5 100644 --- a/pkg/saccharine/token/token.go +++ b/pkg/saccharine/token/token.go @@ -1,5 +1,7 @@ package token +import "fmt" + // All tokens in the pseudo-lambda language. type Type int @@ -8,12 +10,12 @@ const ( CloseParen // Denotes the ')' token. OpenBrace // Denotes the '{' token. CloseBrace // Denotes the '}' token. - End // Denotes the ';' token. + HardBreak // Denotes the ';' token. Assign // Denotes the ':=' token. Atom // Denotes an alpha-numeric variable. Slash // Denotes the '/' token. Dot // Denotes the '.' token. - Newline // Denotes a new-line. + SoftBreak // Denotes a new-line. ) // A representation of a token in source code. @@ -43,8 +45,8 @@ func NewDot(column int) *Token { return &Token{Type: Dot, Column: column, Value: "."} } -func NewEnd(column int) *Token { - return &Token{Type: End, Column: column, Value: ";"} +func NewHardBreak(column int) *Token { + return &Token{Type: HardBreak, Column: column, Value: ";"} } func NewAssign(column int) *Token { @@ -59,8 +61,8 @@ func NewAtom(name string, column int) *Token { return &Token{Type: Atom, Column: column, Value: name} } -func NewNewline(column int) *Token { - return &Token{Type: Newline, Column: column, Value: "\\n"} +func NewSoftBreak(column int) *Token { + return &Token{Type: SoftBreak, Column: column, Value: "\\n"} } func Name(typ Type) string { @@ -75,10 +77,12 @@ func Name(typ Type) string { return "." case Atom: return "ATOM" - case Newline: + case SoftBreak: return "\\n" + case HardBreak: + return ";" default: - return "?" + panic(fmt.Errorf("unknown token type %v", typ)) } } diff --git a/pkg/saccharine/tokenizer.go b/pkg/saccharine/tokenizer.go index 27dda70..c0de4de 100644 --- a/pkg/saccharine/tokenizer.go +++ b/pkg/saccharine/tokenizer.go @@ -52,7 +52,7 @@ func getToken(i *iterator.Iterator[rune]) (*token.Token, error) { letter, err := i.Next() if err != nil { - return nil, trace.WrapError(fmt.Errorf("cannot produce next token"), err) + return nil, trace.Wrap(err, "cannot produce next token") } switch { @@ -65,11 +65,11 @@ func getToken(i *iterator.Iterator[rune]) (*token.Token, error) { case letter == '\\': return token.NewSlash(index), nil case letter == '\n': - return token.NewNewline(index), nil + return token.NewSoftBreak(index), nil case letter == '{': - return token.NewNewline(index), nil + return token.NewOpenBrace(index), nil case letter == '}': - return token.NewNewline(index), nil + return token.NewCloseBrace(index), nil case letter == ':': if _, err := parseCharacter(i, '='); err != nil { return nil, err @@ -77,7 +77,7 @@ func getToken(i *iterator.Iterator[rune]) (*token.Token, error) { return token.NewAssign(index), nil } case letter == ';': - return token.NewEnd(index), nil + return token.NewHardBreak(index), nil case unicode.IsSpace(letter): return nil, nil case isVariable(letter): diff --git a/pkg/trace/trace.go b/pkg/trace/trace.go index a2d9af6..87a0337 100644 --- a/pkg/trace/trace.go +++ b/pkg/trace/trace.go @@ -2,6 +2,7 @@ package trace import ( "errors" + "fmt" "strings" ) @@ -17,7 +18,8 @@ func Indent(s string, size int) string { return indented } -func WrapError(parent error, child error) error { +func Wrap(child error, format string, a ...any) error { + parent := fmt.Errorf(format, a...) childErrString := Indent(child.Error(), 4) return errors.New(parent.Error() + "\n" + childErrString) }