parser.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. package glob
  2. import (
  3. "errors"
  4. "fmt"
  5. "unicode/utf8"
  6. )
  7. type node interface {
  8. children() []node
  9. append(node)
  10. }
  11. // todo may be split it into another package
  12. type lexerIface interface {
  13. nextItem() item
  14. }
  15. type nodeImpl struct {
  16. desc []node
  17. }
  18. func (n *nodeImpl) append(c node) {
  19. n.desc = append(n.desc, c)
  20. }
  21. func (n *nodeImpl) children() []node {
  22. return n.desc
  23. }
  24. type nodeList struct {
  25. nodeImpl
  26. not bool
  27. chars string
  28. }
  29. type nodeRange struct {
  30. nodeImpl
  31. not bool
  32. lo, hi rune
  33. }
  34. type nodeText struct {
  35. nodeImpl
  36. text string
  37. }
  38. type nodePattern struct{ nodeImpl }
  39. type nodeAny struct{ nodeImpl }
  40. type nodeSuper struct{ nodeImpl }
  41. type nodeSingle struct{ nodeImpl }
  42. type nodeAnyOf struct{ nodeImpl }
  43. type tree struct {
  44. root node
  45. current node
  46. path []node
  47. }
  48. func (t *tree) enter(c node) {
  49. if t.root == nil {
  50. t.root = c
  51. t.current = c
  52. return
  53. }
  54. t.current.append(c)
  55. t.path = append(t.path, c)
  56. t.current = c
  57. }
  58. func (t *tree) leave() {
  59. if len(t.path)-1 <= 0 {
  60. t.current = t.root
  61. t.path = nil
  62. return
  63. }
  64. t.path = t.path[:len(t.path)-1]
  65. t.current = t.path[len(t.path)-1]
  66. }
  67. type parseFn func(*tree, lexerIface) (parseFn, error)
  68. func parse(lexer lexerIface) (*nodePattern, error) {
  69. var parser parseFn
  70. root := &nodePattern{}
  71. tree := &tree{}
  72. tree.enter(root)
  73. for parser = parserMain; ; {
  74. next, err := parser(tree, lexer)
  75. if err != nil {
  76. return nil, err
  77. }
  78. if next == nil {
  79. break
  80. }
  81. parser = next
  82. }
  83. return root, nil
  84. }
  85. func parserMain(tree *tree, lexer lexerIface) (parseFn, error) {
  86. for stop := false; !stop; {
  87. item := lexer.nextItem()
  88. switch item.t {
  89. case item_eof:
  90. stop = true
  91. continue
  92. case item_error:
  93. return nil, errors.New(item.s)
  94. case item_text:
  95. tree.current.append(&nodeText{text: item.s})
  96. return parserMain, nil
  97. case item_any:
  98. tree.current.append(&nodeAny{})
  99. return parserMain, nil
  100. case item_super:
  101. tree.current.append(&nodeSuper{})
  102. return parserMain, nil
  103. case item_single:
  104. tree.current.append(&nodeSingle{})
  105. return parserMain, nil
  106. case item_range_open:
  107. return parserRange, nil
  108. case item_terms_open:
  109. tree.enter(&nodeAnyOf{})
  110. tree.enter(&nodePattern{})
  111. return parserMain, nil
  112. case item_separator:
  113. tree.leave()
  114. tree.enter(&nodePattern{})
  115. return parserMain, nil
  116. case item_terms_close:
  117. tree.leave()
  118. tree.leave()
  119. return parserMain, nil
  120. default:
  121. return nil, fmt.Errorf("unexpected token: %s", item)
  122. }
  123. }
  124. return nil, nil
  125. }
  126. func parserRange(tree *tree, lexer lexerIface) (parseFn, error) {
  127. var (
  128. not bool
  129. lo rune
  130. hi rune
  131. chars string
  132. )
  133. for {
  134. item := lexer.nextItem()
  135. switch item.t {
  136. case item_eof:
  137. return nil, errors.New("unexpected end")
  138. case item_error:
  139. return nil, errors.New(item.s)
  140. case item_not:
  141. not = true
  142. case item_range_lo:
  143. r, w := utf8.DecodeRuneInString(item.s)
  144. if len(item.s) > w {
  145. return nil, fmt.Errorf("unexpected length of lo character")
  146. }
  147. lo = r
  148. case item_range_between:
  149. //
  150. case item_range_hi:
  151. r, w := utf8.DecodeRuneInString(item.s)
  152. if len(item.s) > w {
  153. return nil, fmt.Errorf("unexpected length of lo character")
  154. }
  155. hi = r
  156. if hi < lo {
  157. return nil, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo))
  158. }
  159. case item_text:
  160. chars = item.s
  161. case item_range_close:
  162. isRange := lo != 0 && hi != 0
  163. isChars := chars != ""
  164. if isChars == isRange {
  165. return nil, fmt.Errorf("could not parse range")
  166. }
  167. if isRange {
  168. tree.current.append(&nodeRange{
  169. lo: lo,
  170. hi: hi,
  171. not: not,
  172. })
  173. } else {
  174. tree.current.append(&nodeList{
  175. chars: chars,
  176. not: not,
  177. })
  178. }
  179. return parserMain, nil
  180. }
  181. }
  182. }