| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436 |
- package mark
- import (
- "regexp"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- // parse holds the state of the parser.
- type parse struct {
- Nodes []Node
- lex Lexer
- options *Options
- tr *parse
- output string
- peekCount int
- token [3]item // three-token lookahead for parser
- links map[string]*DefLinkNode // Deflink parsing, used RefLinks
- renderFn map[NodeType]RenderFn // Custom overridden fns
- }
- // Return new parser
- func newParse(input string, opts *Options) *parse {
- return &parse{
- lex: lex(input),
- options: opts,
- links: make(map[string]*DefLinkNode),
- renderFn: make(map[NodeType]RenderFn),
- }
- }
- // parse convert the raw text to Nodeparse.
- func (p *parse) parse() {
- Loop:
- for {
- var n Node
- switch t := p.peek(); t.typ {
- case itemEOF, itemError:
- break Loop
- case itemNewLine:
- p.next()
- case itemHr:
- n = p.newHr(p.next().pos)
- case itemHTML:
- t = p.next()
- n = p.newHTML(t.pos, t.val)
- case itemDefLink:
- n = p.parseDefLink()
- case itemHeading, itemLHeading:
- n = p.parseHeading()
- case itemCodeBlock, itemGfmCodeBlock:
- n = p.parseCodeBlock()
- case itemList:
- n = p.parseList()
- case itemTable, itemLpTable:
- n = p.parseTable()
- case itemBlockQuote:
- n = p.parseBlockQuote()
- case itemIndent:
- space := p.next()
- // If it isn't followed by itemText
- if p.peek().typ != itemText {
- continue
- }
- p.backup2(space)
- fallthrough
- // itemText
- default:
- tmp := p.newParagraph(t.pos)
- tmp.Nodes = p.parseText(p.next().val + p.scanLines())
- n = tmp
- }
- if n != nil {
- p.append(n)
- }
- }
- }
- // Root getter
- func (p *parse) root() *parse {
- if p.tr == nil {
- return p
- }
- return p.tr.root()
- }
- // Render parse nodes to the wanted output
- func (p *parse) render() {
- var output string
- for i, node := range p.Nodes {
- // If there's a custom render function, use it instead.
- if fn, ok := p.renderFn[node.Type()]; ok {
- output = fn(node)
- } else {
- output = node.Render()
- }
- p.output += output
- if output != "" && i != len(p.Nodes)-1 {
- p.output += "\n"
- }
- }
- }
- // append new node to nodes-list
- func (p *parse) append(n Node) {
- p.Nodes = append(p.Nodes, n)
- }
- // next returns the next token
- func (p *parse) next() item {
- if p.peekCount > 0 {
- p.peekCount--
- } else {
- p.token[0] = p.lex.nextItem()
- }
- return p.token[p.peekCount]
- }
- // peek returns but does not consume the next token.
- func (p *parse) peek() item {
- if p.peekCount > 0 {
- return p.token[p.peekCount-1]
- }
- p.peekCount = 1
- p.token[0] = p.lex.nextItem()
- return p.token[0]
- }
- // backup backs the input stream tp one token
- func (p *parse) backup() {
- p.peekCount++
- }
- // backup2 backs the input stream up two tokens.
- // The zeroth token is already there.
- func (p *parse) backup2(t1 item) {
- p.token[1] = t1
- p.peekCount = 2
- }
- // parseText
- func (p *parse) parseText(input string) (nodes []Node) {
- // Trim whitespaces that not a line-break
- input = regexp.MustCompile(`(?m)^ +| +(\n|$)`).ReplaceAllStringFunc(input, func(s string) string {
- if reBr.MatchString(s) {
- return s
- }
- return strings.Replace(s, " ", "", -1)
- })
- l := lexInline(input)
- for token := range l.items {
- var node Node
- switch token.typ {
- case itemBr:
- node = p.newBr(token.pos)
- case itemStrong, itemItalic, itemStrike, itemCode:
- node = p.parseEmphasis(token.typ, token.pos, token.val)
- case itemLink, itemAutoLink, itemGfmLink:
- var title, href string
- var text []Node
- if token.typ == itemLink {
- match := reLink.FindStringSubmatch(token.val)
- text = p.parseText(match[1])
- href, title = match[2], match[3]
- } else {
- var match []string
- if token.typ == itemGfmLink {
- match = reGfmLink.FindStringSubmatch(token.val)
- } else {
- match = reAutoLink.FindStringSubmatch(token.val)
- }
- href = match[1]
- text = append(text, p.newText(token.pos, match[1]))
- }
- node = p.newLink(token.pos, title, href, text...)
- case itemImage:
- match := reImage.FindStringSubmatch(token.val)
- node = p.newImage(token.pos, match[3], match[2], match[1])
- case itemRefLink, itemRefImage:
- match := reRefLink.FindStringSubmatch(token.val)
- text, ref := match[1], match[2]
- if ref == "" {
- ref = text
- }
- if token.typ == itemRefLink {
- node = p.newRefLink(token.typ, token.pos, token.val, ref, p.parseText(text))
- } else {
- node = p.newRefImage(token.typ, token.pos, token.val, ref, text)
- }
- case itemHTML:
- node = p.newHTML(token.pos, token.val)
- default:
- node = p.newText(token.pos, token.val)
- }
- nodes = append(nodes, node)
- }
- return nodes
- }
- // parse inline emphasis
- func (p *parse) parseEmphasis(typ itemType, pos Pos, val string) *EmphasisNode {
- var re *regexp.Regexp
- switch typ {
- case itemStrike:
- re = reStrike
- case itemStrong:
- re = reStrong
- case itemCode:
- re = reCode
- case itemItalic:
- re = reItalic
- }
- node := p.newEmphasis(pos, typ)
- match := re.FindStringSubmatch(val)
- text := match[len(match)-1]
- if text == "" {
- text = match[1]
- }
- node.Nodes = p.parseText(text)
- return node
- }
- // parse heading block
- func (p *parse) parseHeading() (node *HeadingNode) {
- token := p.next()
- level := 1
- var text string
- if token.typ == itemHeading {
- match := reHeading.FindStringSubmatch(token.val)
- level, text = len(match[1]), match[2]
- } else {
- match := reLHeading.FindStringSubmatch(token.val)
- // using equal signs for first-level, and dashes for second-level.
- text = match[1]
- if match[2] == "-" {
- level = 2
- }
- }
- node = p.newHeading(token.pos, level, text)
- node.Nodes = p.parseText(text)
- return
- }
- func (p *parse) parseDefLink() *DefLinkNode {
- token := p.next()
- match := reDefLink.FindStringSubmatch(token.val)
- name := strings.ToLower(match[1])
- // name(lowercase), href, title
- n := p.newDefLink(token.pos, name, match[2], match[3])
- // store in links
- links := p.root().links
- if _, ok := links[name]; !ok {
- links[name] = n
- }
- return n
- }
- // parse codeBlock
- func (p *parse) parseCodeBlock() *CodeNode {
- var lang, text string
- token := p.next()
- if token.typ == itemGfmCodeBlock {
- codeStart := reGfmCode.FindStringSubmatch(token.val)
- lang = codeStart[3]
- text = token.val[len(codeStart[0]):]
- } else {
- text = reCodeBlock.trim(token.val, "")
- }
- return p.newCode(token.pos, lang, text)
- }
- func (p *parse) parseBlockQuote() (n *BlockQuoteNode) {
- token := p.next()
- // replacer
- re := regexp.MustCompile(`(?m)^ *> ?`)
- raw := re.ReplaceAllString(token.val, "")
- // TODO(a8m): doesn't work right now with defLink(inside the blockQuote)
- tr := &parse{lex: lex(raw), tr: p}
- tr.parse()
- n = p.newBlockQuote(token.pos)
- n.Nodes = tr.Nodes
- return
- }
- // parse list
- func (p *parse) parseList() *ListNode {
- token := p.next()
- list := p.newList(token.pos, isDigit(token.val))
- Loop:
- for {
- switch token = p.peek(); token.typ {
- case itemLooseItem, itemListItem:
- list.append(p.parseListItem())
- default:
- break Loop
- }
- }
- return list
- }
- // parse listItem
- func (p *parse) parseListItem() *ListItemNode {
- token := p.next()
- item := p.newListItem(token.pos)
- token.val = strings.TrimSpace(token.val)
- if p.isTaskItem(token.val) {
- item.Nodes = p.parseTaskItem(token)
- return item
- }
- tr := &parse{lex: lex(token.val), tr: p}
- tr.parse()
- for _, node := range tr.Nodes {
- // wrap with paragraph only when it's a loose item
- if n, ok := node.(*ParagraphNode); ok && token.typ == itemListItem {
- item.Nodes = append(item.Nodes, n.Nodes...)
- } else {
- item.append(node)
- }
- }
- return item
- }
- // parseTaskItem parses list item as a task item.
- func (p *parse) parseTaskItem(token item) []Node {
- checkbox := p.newCheckbox(token.pos, token.val[1] == 'x')
- token.val = strings.TrimSpace(token.val[3:])
- return append([]Node{checkbox}, p.parseText(token.val)...)
- }
- // isTaskItem tests if the given string is list task item.
- func (p *parse) isTaskItem(s string) bool {
- if len(s) < 5 || s[0] != '[' || (s[1] != 'x' && s[1] != ' ') || s[2] != ']' {
- return false
- }
- return "" != strings.TrimSpace(s[3:])
- }
- // parse table
- func (p *parse) parseTable() *TableNode {
- table := p.newTable(p.next().pos)
- // Align [ None, Left, Right, ... ]
- // Header [ Cells: [ ... ] ]
- // Data: [ Rows: [ Cells: [ ... ] ] ]
- rows := struct {
- Align []AlignType
- Header []item
- Cells [][]item
- }{}
- Loop:
- for i := 0; ; {
- switch token := p.next(); token.typ {
- case itemTableRow:
- i++
- if i > 2 {
- rows.Cells = append(rows.Cells, []item{})
- }
- case itemTableCell:
- // Header
- if i == 1 {
- rows.Header = append(rows.Header, token)
- // Alignment
- } else if i == 2 {
- rows.Align = append(rows.Align, parseAlign(token.val))
- // Data
- } else {
- pos := i - 3
- rows.Cells[pos] = append(rows.Cells[pos], token)
- }
- default:
- p.backup()
- break Loop
- }
- }
- // Tranform to nodes
- table.append(p.parseCells(Header, rows.Header, rows.Align))
- // Table body
- for _, row := range rows.Cells {
- table.append(p.parseCells(Data, row, rows.Align))
- }
- return table
- }
- // parse cells and return new row
- func (p *parse) parseCells(kind int, items []item, align []AlignType) *RowNode {
- var row *RowNode
- for i, item := range items {
- if i == 0 {
- row = p.newRow(item.pos)
- }
- cell := p.newCell(item.pos, kind, align[i])
- cell.Nodes = p.parseText(item.val)
- row.append(cell)
- }
- return row
- }
- // Used to consume lines(itemText) for a continues paragraphs
- func (p *parse) scanLines() (s string) {
- for {
- tkn := p.next()
- if tkn.typ == itemText || tkn.typ == itemIndent {
- s += tkn.val
- } else if tkn.typ == itemNewLine {
- if t := p.peek().typ; t != itemText && t != itemIndent {
- p.backup2(tkn)
- break
- }
- s += tkn.val
- } else {
- p.backup()
- break
- }
- }
- return
- }
- // get align-string and return the align type of it
- func parseAlign(s string) (typ AlignType) {
- sfx, pfx := strings.HasSuffix(s, ":"), strings.HasPrefix(s, ":")
- switch {
- case sfx && pfx:
- typ = Center
- case sfx:
- typ = Right
- case pfx:
- typ = Left
- }
- return
- }
- // test if given string is digit
- func isDigit(s string) bool {
- r, _ := utf8.DecodeRuneInString(s)
- return unicode.IsDigit(r)
- }
|