| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- package interpolate
- import (
- "fmt"
- "strconv"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- // This is a recursive descent parser for our grammar. Because it can contain nested expressions
- // like ${LLAMAS:-${ROCK:-true}} we can't use regular expressions. The simplest possible alternative
- // is a recursive parser like this. It parses a chunk and then calls a function to parse that
- // further and so on and so forth. It results in a tree of objects that represent the things we've
- // parsed (an AST). This means that the logic for how expansions work lives in those objects, and
- // the logic for how we go from plain text to parsed objects lives here.
- //
- // To keep things simple, we do our "lexing" or "scanning" just as a few functions at the end of the
- // file rather than as a dedicated lexer that emits tokens. This matches the simplicity of the
- // format we are parsing relatively well
- //
- // Below is an EBNF grammar for the language. The parser was built by basically turning this into
- // functions and structs named the same reading the string bite by bite (peekRune and nextRune)
- /*
- EscapedBackslash = "\\" EscapedDollar = ( "\$" | "$$") Identifier = letter { letters |
- digit | "_" } Expansion = "$" ( Identifier | Brace ) Brace = "{" Identifier [
- Identifier BraceOperation ] "}" Text = { EscapedBackslash | EscapedDollar | all characters except
- "$" } Expression = { Text | Expansion } EmptyValue = ":-" { Expression } UnsetValue =
- "-" { Expression } Substring = ":" number [ ":" number ] Required = "?" { Expression }
- Operation = EmptyValue | UnsetValue | Substring | Required
- */
- const (
- eof = -1
- )
- // Parser takes a string and parses out a tree of structs that represent text and Expansions
- type Parser struct {
- input string // the string we are scanning
- pos int // the current position
- }
- // NewParser returns a new instance of a Parser
- func NewParser(str string) *Parser {
- return &Parser{
- input: str,
- pos: 0,
- }
- }
- // Parse expansions out of the internal text and return them as a tree of Expressions
- func (p *Parser) Parse() (Expression, error) {
- return p.parseExpression()
- }
- func (p *Parser) parseExpression(stop ...rune) (Expression, error) {
- var expr Expression
- var stopStr = string(stop)
- for {
- c := p.peekRune()
- if c == eof || strings.ContainsRune(stopStr, c) {
- break
- }
- // check for our escaped characters first, as we assume nothing subsequently is escaped
- if strings.HasPrefix(p.input[p.pos:], `\\`) {
- p.pos += 2
- expr = append(expr, ExpressionItem{Text: `\\`})
- continue
- } else if strings.HasPrefix(p.input[p.pos:], `\$`) || strings.HasPrefix(p.input[p.pos:], `$$`) {
- p.pos += 2
- expr = append(expr, ExpressionItem{Text: `$`})
- continue
- }
- // Ignore bash shell expansions
- if strings.HasPrefix(p.input[p.pos:], `$(`) {
- p.pos += 2
- expr = append(expr, ExpressionItem{Text: `$(`})
- continue
- }
- // If we run into a dollar sign and it's not the last char, it's an expansion
- if c == '$' && p.pos < (len(p.input)-1) {
- expansion, err := p.parseExpansion()
- if err != nil {
- return nil, err
- }
- expr = append(expr, ExpressionItem{Expansion: expansion})
- continue
- }
- // nibble a character, otherwise if it's a \ or a $ we can loop
- c = p.nextRune()
- // Scan as much as we can into text
- text := p.scanUntil(func(r rune) bool {
- return (r == '$' || r == '\\' || strings.ContainsRune(stopStr, r))
- })
- expr = append(expr, ExpressionItem{Text: string(c) + text})
- }
- return expr, nil
- }
- func (p *Parser) parseExpansion() (Expansion, error) {
- if c := p.nextRune(); c != '$' {
- return nil, fmt.Errorf("Expected expansion to start with $, got %c", c)
- }
- // if we have an open brace, this is a brace expansion
- if c := p.peekRune(); c == '{' {
- return p.parseBraceExpansion()
- }
- identifier, err := p.scanIdentifier()
- if err != nil {
- return nil, err
- }
- return VariableExpansion{Identifier: identifier}, nil
- }
- func (p *Parser) parseBraceExpansion() (Expansion, error) {
- if c := p.nextRune(); c != '{' {
- return nil, fmt.Errorf("Expected brace expansion to start with {, got %c", c)
- }
- identifier, err := p.scanIdentifier()
- if err != nil {
- return nil, err
- }
- if c := p.peekRune(); c == '}' {
- _ = p.nextRune()
- return VariableExpansion{Identifier: identifier}, nil
- }
- var operator string
- var exp Expansion
- // Parse an operator, some trickery is needed to handle : vs :-
- if op1 := p.nextRune(); op1 == ':' {
- if op2 := p.peekRune(); op2 == '-' {
- _ = p.nextRune()
- operator = ":-"
- } else {
- operator = ":"
- }
- } else if op1 == '?' || op1 == '-' {
- operator = string(op1)
- } else {
- return nil, fmt.Errorf("Expected an operator, got %c", op1)
- }
- switch operator {
- case `:-`:
- exp, err = p.parseEmptyValueExpansion(identifier)
- if err != nil {
- return nil, err
- }
- case `-`:
- exp, err = p.parseUnsetValueExpansion(identifier)
- if err != nil {
- return nil, err
- }
- case `:`:
- exp, err = p.parseSubstringExpansion(identifier)
- if err != nil {
- return nil, err
- }
- case `?`:
- exp, err = p.parseRequiredExpansion(identifier)
- if err != nil {
- return nil, err
- }
- }
- if c := p.nextRune(); c != '}' {
- return nil, fmt.Errorf("Expected brace expansion to end with }, got %c", c)
- }
- return exp, nil
- }
- func (p *Parser) parseEmptyValueExpansion(identifier string) (Expansion, error) {
- // parse an expression (text and expansions) up until the end of the brace
- expr, err := p.parseExpression('}')
- if err != nil {
- return nil, err
- }
- return EmptyValueExpansion{Identifier: identifier, Content: expr}, nil
- }
- func (p *Parser) parseUnsetValueExpansion(identifier string) (Expansion, error) {
- expr, err := p.parseExpression('}')
- if err != nil {
- return nil, err
- }
- return UnsetValueExpansion{Identifier: identifier, Content: expr}, nil
- }
- func (p *Parser) parseSubstringExpansion(identifier string) (Expansion, error) {
- offset := p.scanUntil(func(r rune) bool {
- return r == ':' || r == '}'
- })
- offsetInt, err := strconv.Atoi(strings.TrimSpace(offset))
- if err != nil {
- return nil, fmt.Errorf("Unable to parse offset: %v", err)
- }
- if c := p.peekRune(); c == '}' {
- return SubstringExpansion{Identifier: identifier, Offset: offsetInt}, nil
- }
- _ = p.nextRune()
- length := p.scanUntil(func(r rune) bool {
- return r == '}'
- })
- lengthInt, err := strconv.Atoi(strings.TrimSpace(length))
- if err != nil {
- return nil, fmt.Errorf("Unable to parse length: %v", err)
- }
- return SubstringExpansion{Identifier: identifier, Offset: offsetInt, Length: lengthInt, HasLength: true}, nil
- }
- func (p *Parser) parseRequiredExpansion(identifier string) (Expansion, error) {
- expr, err := p.parseExpression('}')
- if err != nil {
- return nil, err
- }
- return RequiredExpansion{Identifier: identifier, Message: expr}, nil
- }
- func (p *Parser) scanUntil(f func(rune) bool) string {
- start := p.pos
- for int(p.pos) < len(p.input) {
- c, size := utf8.DecodeRuneInString(p.input[p.pos:])
- if c == utf8.RuneError || f(c) {
- break
- }
- p.pos += size
- }
- return p.input[start:p.pos]
- }
- func (p *Parser) scanIdentifier() (string, error) {
- if c := p.peekRune(); !unicode.IsLetter(c) {
- return "", fmt.Errorf("Expected identifier to start with a letter, got %c", c)
- }
- var notIdentifierChar = func(r rune) bool {
- return (!unicode.IsLetter(r) && !unicode.IsNumber(r) && r != '_')
- }
- return p.scanUntil(notIdentifierChar), nil
- }
- func (p *Parser) nextRune() rune {
- if int(p.pos) >= len(p.input) {
- return eof
- }
- c, size := utf8.DecodeRuneInString(p.input[p.pos:])
- p.pos += size
- return c
- }
- func (p *Parser) peekRune() rune {
- if int(p.pos) >= len(p.input) {
- return eof
- }
- c, _ := utf8.DecodeRuneInString(p.input[p.pos:])
- return c
- }
|