parser.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. package interpolate
  2. import (
  3. "fmt"
  4. "strconv"
  5. "strings"
  6. "unicode"
  7. "unicode/utf8"
  8. )
  9. // This is a recursive descent parser for our grammar. Because it can contain nested expressions
  10. // like ${LLAMAS:-${ROCK:-true}} we can't use regular expressions. The simplest possible alternative
  11. // is a recursive parser like this. It parses a chunk and then calls a function to parse that
  12. // further and so on and so forth. It results in a tree of objects that represent the things we've
  13. // parsed (an AST). This means that the logic for how expansions work lives in those objects, and
  14. // the logic for how we go from plain text to parsed objects lives here.
  15. //
  16. // To keep things simple, we do our "lexing" or "scanning" just as a few functions at the end of the
  17. // file rather than as a dedicated lexer that emits tokens. This matches the simplicity of the
  18. // format we are parsing relatively well
  19. //
  20. // Below is an EBNF grammar for the language. The parser was built by basically turning this into
  21. // functions and structs named the same reading the string bite by bite (peekRune and nextRune)
  22. /*
  23. EscapedBackslash = "\\" EscapedDollar = ( "\$" | "$$") Identifier = letter { letters |
  24. digit | "_" } Expansion = "$" ( Identifier | Brace ) Brace = "{" Identifier [
  25. Identifier BraceOperation ] "}" Text = { EscapedBackslash | EscapedDollar | all characters except
  26. "$" } Expression = { Text | Expansion } EmptyValue = ":-" { Expression } UnsetValue =
  27. "-" { Expression } Substring = ":" number [ ":" number ] Required = "?" { Expression }
  28. Operation = EmptyValue | UnsetValue | Substring | Required
  29. */
  30. const (
  31. eof = -1
  32. )
  33. // Parser takes a string and parses out a tree of structs that represent text and Expansions
  34. type Parser struct {
  35. input string // the string we are scanning
  36. pos int // the current position
  37. }
  38. // NewParser returns a new instance of a Parser
  39. func NewParser(str string) *Parser {
  40. return &Parser{
  41. input: str,
  42. pos: 0,
  43. }
  44. }
  45. // Parse expansions out of the internal text and return them as a tree of Expressions
  46. func (p *Parser) Parse() (Expression, error) {
  47. return p.parseExpression()
  48. }
  49. func (p *Parser) parseExpression(stop ...rune) (Expression, error) {
  50. var expr Expression
  51. var stopStr = string(stop)
  52. for {
  53. c := p.peekRune()
  54. if c == eof || strings.ContainsRune(stopStr, c) {
  55. break
  56. }
  57. // check for our escaped characters first, as we assume nothing subsequently is escaped
  58. if strings.HasPrefix(p.input[p.pos:], `\\`) {
  59. p.pos += 2
  60. expr = append(expr, ExpressionItem{Text: `\\`})
  61. continue
  62. } else if strings.HasPrefix(p.input[p.pos:], `\$`) || strings.HasPrefix(p.input[p.pos:], `$$`) {
  63. p.pos += 2
  64. expr = append(expr, ExpressionItem{Text: `$`})
  65. continue
  66. }
  67. // Ignore bash shell expansions
  68. if strings.HasPrefix(p.input[p.pos:], `$(`) {
  69. p.pos += 2
  70. expr = append(expr, ExpressionItem{Text: `$(`})
  71. continue
  72. }
  73. // If we run into a dollar sign and it's not the last char, it's an expansion
  74. if c == '$' && p.pos < (len(p.input)-1) {
  75. expansion, err := p.parseExpansion()
  76. if err != nil {
  77. return nil, err
  78. }
  79. expr = append(expr, ExpressionItem{Expansion: expansion})
  80. continue
  81. }
  82. // nibble a character, otherwise if it's a \ or a $ we can loop
  83. c = p.nextRune()
  84. // Scan as much as we can into text
  85. text := p.scanUntil(func(r rune) bool {
  86. return (r == '$' || r == '\\' || strings.ContainsRune(stopStr, r))
  87. })
  88. expr = append(expr, ExpressionItem{Text: string(c) + text})
  89. }
  90. return expr, nil
  91. }
  92. func (p *Parser) parseExpansion() (Expansion, error) {
  93. if c := p.nextRune(); c != '$' {
  94. return nil, fmt.Errorf("Expected expansion to start with $, got %c", c)
  95. }
  96. // if we have an open brace, this is a brace expansion
  97. if c := p.peekRune(); c == '{' {
  98. return p.parseBraceExpansion()
  99. }
  100. identifier, err := p.scanIdentifier()
  101. if err != nil {
  102. return nil, err
  103. }
  104. return VariableExpansion{Identifier: identifier}, nil
  105. }
  106. func (p *Parser) parseBraceExpansion() (Expansion, error) {
  107. if c := p.nextRune(); c != '{' {
  108. return nil, fmt.Errorf("Expected brace expansion to start with {, got %c", c)
  109. }
  110. identifier, err := p.scanIdentifier()
  111. if err != nil {
  112. return nil, err
  113. }
  114. if c := p.peekRune(); c == '}' {
  115. _ = p.nextRune()
  116. return VariableExpansion{Identifier: identifier}, nil
  117. }
  118. var operator string
  119. var exp Expansion
  120. // Parse an operator, some trickery is needed to handle : vs :-
  121. if op1 := p.nextRune(); op1 == ':' {
  122. if op2 := p.peekRune(); op2 == '-' {
  123. _ = p.nextRune()
  124. operator = ":-"
  125. } else {
  126. operator = ":"
  127. }
  128. } else if op1 == '?' || op1 == '-' {
  129. operator = string(op1)
  130. } else {
  131. return nil, fmt.Errorf("Expected an operator, got %c", op1)
  132. }
  133. switch operator {
  134. case `:-`:
  135. exp, err = p.parseEmptyValueExpansion(identifier)
  136. if err != nil {
  137. return nil, err
  138. }
  139. case `-`:
  140. exp, err = p.parseUnsetValueExpansion(identifier)
  141. if err != nil {
  142. return nil, err
  143. }
  144. case `:`:
  145. exp, err = p.parseSubstringExpansion(identifier)
  146. if err != nil {
  147. return nil, err
  148. }
  149. case `?`:
  150. exp, err = p.parseRequiredExpansion(identifier)
  151. if err != nil {
  152. return nil, err
  153. }
  154. }
  155. if c := p.nextRune(); c != '}' {
  156. return nil, fmt.Errorf("Expected brace expansion to end with }, got %c", c)
  157. }
  158. return exp, nil
  159. }
  160. func (p *Parser) parseEmptyValueExpansion(identifier string) (Expansion, error) {
  161. // parse an expression (text and expansions) up until the end of the brace
  162. expr, err := p.parseExpression('}')
  163. if err != nil {
  164. return nil, err
  165. }
  166. return EmptyValueExpansion{Identifier: identifier, Content: expr}, nil
  167. }
  168. func (p *Parser) parseUnsetValueExpansion(identifier string) (Expansion, error) {
  169. expr, err := p.parseExpression('}')
  170. if err != nil {
  171. return nil, err
  172. }
  173. return UnsetValueExpansion{Identifier: identifier, Content: expr}, nil
  174. }
  175. func (p *Parser) parseSubstringExpansion(identifier string) (Expansion, error) {
  176. offset := p.scanUntil(func(r rune) bool {
  177. return r == ':' || r == '}'
  178. })
  179. offsetInt, err := strconv.Atoi(strings.TrimSpace(offset))
  180. if err != nil {
  181. return nil, fmt.Errorf("Unable to parse offset: %v", err)
  182. }
  183. if c := p.peekRune(); c == '}' {
  184. return SubstringExpansion{Identifier: identifier, Offset: offsetInt}, nil
  185. }
  186. _ = p.nextRune()
  187. length := p.scanUntil(func(r rune) bool {
  188. return r == '}'
  189. })
  190. lengthInt, err := strconv.Atoi(strings.TrimSpace(length))
  191. if err != nil {
  192. return nil, fmt.Errorf("Unable to parse length: %v", err)
  193. }
  194. return SubstringExpansion{Identifier: identifier, Offset: offsetInt, Length: lengthInt, HasLength: true}, nil
  195. }
  196. func (p *Parser) parseRequiredExpansion(identifier string) (Expansion, error) {
  197. expr, err := p.parseExpression('}')
  198. if err != nil {
  199. return nil, err
  200. }
  201. return RequiredExpansion{Identifier: identifier, Message: expr}, nil
  202. }
  203. func (p *Parser) scanUntil(f func(rune) bool) string {
  204. start := p.pos
  205. for int(p.pos) < len(p.input) {
  206. c, size := utf8.DecodeRuneInString(p.input[p.pos:])
  207. if c == utf8.RuneError || f(c) {
  208. break
  209. }
  210. p.pos += size
  211. }
  212. return p.input[start:p.pos]
  213. }
  214. func (p *Parser) scanIdentifier() (string, error) {
  215. if c := p.peekRune(); !unicode.IsLetter(c) {
  216. return "", fmt.Errorf("Expected identifier to start with a letter, got %c", c)
  217. }
  218. var notIdentifierChar = func(r rune) bool {
  219. return (!unicode.IsLetter(r) && !unicode.IsNumber(r) && r != '_')
  220. }
  221. return p.scanUntil(notIdentifierChar), nil
  222. }
  223. func (p *Parser) nextRune() rune {
  224. if int(p.pos) >= len(p.input) {
  225. return eof
  226. }
  227. c, size := utf8.DecodeRuneInString(p.input[p.pos:])
  228. p.pos += size
  229. return c
  230. }
  231. func (p *Parser) peekRune() rune {
  232. if int(p.pos) >= len(p.input) {
  233. return eof
  234. }
  235. c, _ := utf8.DecodeRuneInString(p.input[p.pos:])
  236. return c
  237. }