Browse Source

highlighter: support Fenced Code Block

Le Tan 7 years ago
parent
commit
fbfc6c1dd6

+ 116 - 128
src/hgmarkdownhighlighter.cpp

@@ -193,9 +193,9 @@ void HGMarkdownHighlighter::highlightBlock(const QString &text)
         goto exit;
     }
 
-    // PEG Markdown Highlight does not handle the ``` code block correctly.
     setCurrentBlockState(HighlightBlockState::Normal);
-    highlightCodeBlock(curBlock, text);
+
+    highlightCodeBlock(blockNum, text);
 
     if (currentBlockState() == HighlightBlockState::Normal) {
         if (isVerbatimBlock(curBlock)) {
@@ -211,11 +211,6 @@ void HGMarkdownHighlighter::highlightBlock(const QString &text)
         }
     }
 
-    // PEG Markdown Highlight does not handle links with spaces in the URL.
-    // Links in the URL should be encoded to %20. We just let it be here and won't
-    // fix this.
-    // highlightLinkWithSpacesInURL(text);
-
     if (currentBlockState() != HighlightBlockState::CodeBlock) {
         goto exit;
     }
@@ -377,6 +372,84 @@ void HGMarkdownHighlighter::initVerbatimBlocksFromResult()
     }
 }
 
+void HGMarkdownHighlighter::initFencedCodeBlocksFromResult()
+{
+    m_codeBlocks.clear();
+    m_codeBlocksState.clear();
+    if (!result) {
+        return;
+    }
+
+    // Ordered by start position in ascending order.
+    QMap<int, VElementRegion> regs;
+
+    pmh_element *elem = result[pmh_FENCEDCODEBLOCK];
+    while (elem != NULL) {
+        if (elem->end <= elem->pos) {
+            elem = elem->next;
+            continue;
+        }
+
+        if (!regs.contains(elem->pos)) {
+            regs.insert(elem->pos, VElementRegion(elem->pos, elem->end));
+        }
+
+        elem = elem->next;
+    }
+
+    VCodeBlock item;
+    bool inBlock = false;
+    for (auto it = regs.begin(); it != regs.end(); ++it) {
+        // [firstBlock, lastBlock].
+        int firstBlock = document->findBlock(it.value().m_startPos).blockNumber();
+        int lastBlock = document->findBlock(it.value().m_endPos - 1).blockNumber();
+
+        QTextBlock block = document->findBlockByNumber(firstBlock);
+        while (block.isValid()) {
+            int blockNumber = block.blockNumber();
+            if (blockNumber > lastBlock) {
+                break;
+            }
+
+            HighlightBlockState state = HighlightBlockState::Normal;
+            QString text = block.text();
+            if (inBlock) {
+                item.m_text = item.m_text + "\n" + text;
+                int idx = codeBlockEndExp.indexIn(text);
+                if (idx >= 0) {
+                    // End block.
+                    inBlock = false;
+                    state = HighlightBlockState::CodeBlockEnd;
+                    item.m_endBlock = blockNumber;
+                    m_codeBlocks.append(item);
+                } else {
+                    // Within code block.
+                    state = HighlightBlockState::CodeBlock;
+                }
+            } else {
+                int idx = codeBlockStartExp.indexIn(text);
+                if (idx >= 0) {
+                    // Start block.
+                    inBlock = true;
+                    state = HighlightBlockState::CodeBlockStart;
+                    item.m_startBlock = blockNumber;
+                    item.m_startPos = block.position();
+                    item.m_text = text;
+                    if (codeBlockStartExp.captureCount() == 2) {
+                        item.m_lang = codeBlockStartExp.capturedTexts()[2];
+                    }
+                }
+            }
+
+            if (state != HighlightBlockState::Normal) {
+                m_codeBlocksState.insert(blockNumber, state);
+            }
+
+            block = block.next();
+        }
+    }
+}
+
 void HGMarkdownHighlighter::initHeaderRegionsFromResult()
 {
     // From Qt5.7, the capacity is preserved.
@@ -447,61 +520,46 @@ void HGMarkdownHighlighter::initBlockHighlihgtOne(unsigned long pos,
     }
 }
 
-void HGMarkdownHighlighter::highlightCodeBlock(const QTextBlock &p_block, const QString &p_text)
+void HGMarkdownHighlighter::highlightCodeBlock(int p_blockNumber, const QString &p_text)
 {
-    VTextBlockData *blockData = currentBlockData();
-    Q_ASSERT(blockData);
+    auto it = m_codeBlocksState.find(p_blockNumber);
+    if (it != m_codeBlocksState.end()) {
+        VTextBlockData *blockData = currentBlockData();
+        Q_ASSERT(blockData);
+
+        HighlightBlockState state = it.value();
+        // Set code block indentation.
+        switch (state) {
+        case HighlightBlockState::CodeBlockStart:
+        {
+            int index = codeBlockStartExp.indexIn(p_text);
+            Q_ASSERT(index >= 0);
+            blockData->setCodeBlockIndentation(codeBlockStartExp.capturedTexts()[1].size());
+            break;
+        }
+
+        case HighlightBlockState::CodeBlock:
+            V_FALLTHROUGH;
+        case HighlightBlockState::CodeBlockEnd:
+        {
+            int startLeadingSpaces = 0;
+            VTextBlockData *preBlockData = previousBlockData();
+            if (preBlockData) {
+                startLeadingSpaces = preBlockData->getCodeBlockIndentation();
+            }
 
-    int length = 0;
-    int index = -1;
-    int preState = previousBlockState();
-    int state = HighlightBlockState::Normal;
-
-    if (preState != HighlightBlockState::CodeBlock
-        && preState != HighlightBlockState::CodeBlockStart) {
-        // Need to find a new code block start.
-        index = codeBlockStartExp.indexIn(p_text);
-        if (index >= 0 && !isVerbatimBlock(p_block)) {
-            // Start a new code block.
-            length = p_text.length();
-            state = HighlightBlockState::CodeBlockStart;
-
-            // The leading spaces of code block start and end must be identical.
-            int startLeadingSpaces = codeBlockStartExp.capturedTexts()[1].size();
             blockData->setCodeBlockIndentation(startLeadingSpaces);
-        } else {
-            // A normal block.
-            blockData->setCodeBlockIndentation(-1);
-            return;
-        }
-    } else {
-        // Need to find a code block end.
-        int startLeadingSpaces = 0;
-        VTextBlockData *preBlockData = previousBlockData();
-        if (preBlockData) {
-            startLeadingSpaces = preBlockData->getCodeBlockIndentation();
+            break;
         }
 
-        index = codeBlockEndExp.indexIn(p_text);
-
-        // The closing ``` should have the same indentation as the open ```.
-        if (index >= 0
-            && startLeadingSpaces == codeBlockEndExp.capturedTexts()[1].size()) {
-            // End of code block.
-            length = p_text.length();
-            state = HighlightBlockState::CodeBlockEnd;
-        } else {
-            // Within code block.
-            index = 0;
-            length = p_text.length();
-            state = HighlightBlockState::CodeBlock;
+        default:
+            Q_ASSERT(false);
+            break;
         }
 
-        blockData->setCodeBlockIndentation(startLeadingSpaces);
+        // Set code block state.
+        setCurrentBlockState(state);
     }
-
-    setCurrentBlockState(state);
-    setFormat(index, length, m_codeBlockFormat);
 }
 
 static bool intersect(const QList<QPair<int, int>> &p_indices, int &p_start, int &p_end)
@@ -727,30 +785,6 @@ void HGMarkdownHighlighter::highlightCodeBlockColorColumn(const QString &p_text)
     setFormat(cc - 1, 1, m_colorColumnFormat);
 }
 
-void HGMarkdownHighlighter::highlightLinkWithSpacesInURL(const QString &p_text)
-{
-    if (currentBlockState() == HighlightBlockState::CodeBlock) {
-        return;
-    }
-
-    // TODO: should select links with spaces in URL.
-    QRegExp regExp("[\\!]?\\[[^\\]]*\\]\\(([^\\n\\)]+)\\)");
-    int index = regExp.indexIn(p_text);
-    while (index >= 0) {
-        Q_ASSERT(regExp.captureCount() == 1);
-        int length = regExp.matchedLength();
-        QString capturedText = regExp.capturedTexts()[1];
-        if (capturedText.contains(' ')) {
-            if (p_text[index] == '!' && m_imageFormat.isValid()) {
-                setFormat(index, length, m_imageFormat);
-            } else if (m_linkFormat.isValid()) {
-                setFormat(index, length, m_linkFormat);
-            }
-        }
-        index = regExp.indexIn(p_text, index + length);
-    }
-}
-
 void HGMarkdownHighlighter::parse()
 {
     if (!parsing.testAndSetRelaxed(0, 1)) {
@@ -779,6 +813,8 @@ void HGMarkdownHighlighter::parse()
 
     initVerbatimBlocksFromResult();
 
+    initFencedCodeBlocksFromResult();
+
     initInlineCodeRegionsFromResult();
 
     initBoldItalicRegionsFromResult();
@@ -872,56 +908,8 @@ bool HGMarkdownHighlighter::updateCodeBlocks()
         m_codeBlockHighlights[i].clear();
     }
 
-    QVector<VCodeBlock> codeBlocks;
-
-    VCodeBlock item;
-    bool inBlock = false;
-    int startLeadingSpaces = -1;
-
-    // Only handle complete codeblocks.
-    QTextBlock block = document->firstBlock();
-    while (block.isValid()) {
-        if (!inBlock && isVerbatimBlock(block)) {
-            block = block.next();
-            continue;
-        }
-
-        QString text = block.text();
-        if (inBlock) {
-            item.m_text = item.m_text + "\n" + text;
-            int idx = codeBlockEndExp.indexIn(text);
-            if (idx >= 0 && codeBlockEndExp.capturedTexts()[1].size() == startLeadingSpaces) {
-                // End block.
-                inBlock = false;
-                item.m_endBlock = block.blockNumber();
-
-                // See if it is a code block inside HTML comment.
-                if (!isBlockInsideCommentRegion(block)) {
-                    qDebug() << "add one code block in lang" << item.m_lang;
-                    codeBlocks.append(item);
-                }
-            }
-        } else {
-            int idx = codeBlockStartExp.indexIn(text);
-            if (idx >= 0) {
-                // Start block.
-                inBlock = true;
-                item.m_startBlock = block.blockNumber();
-                item.m_startPos = block.position();
-                item.m_text = text;
-                if (codeBlockStartExp.captureCount() == 2) {
-                    item.m_lang = codeBlockStartExp.capturedTexts()[2];
-                }
-
-                startLeadingSpaces = codeBlockStartExp.capturedTexts()[1].size();
-            }
-        }
-
-        block = block.next();
-    }
-
-    m_numOfCodeBlockHighlightsToRecv = codeBlocks.size();
-    emit codeBlocksUpdated(codeBlocks);
+    m_numOfCodeBlockHighlightsToRecv = m_codeBlocks.size();
+    emit codeBlocksUpdated(m_codeBlocks);
     return m_numOfCodeBlockHighlightsToRecv > 0;
 }
 

+ 11 - 7
src/hgmarkdownhighlighter.h

@@ -9,6 +9,7 @@
 #include <QString>
 
 #include "vtextblockdata.h"
+#include "vconstants.h"
 
 extern "C" {
 #include <pmh_parser.h>
@@ -298,9 +299,14 @@ private:
     QVector<VElementRegion> m_headerRegions;
 
     // All verbatim blocks (by parser) number.
-    // It may be a code block inside fenced code block.
     QSet<int> m_verbatimBlocks;
 
+    // All fenced code blocks.
+    QVector<VCodeBlock> m_codeBlocks;
+
+    // Indexed by block number.
+    QHash<int, HighlightBlockState> m_codeBlocksState;
+
     // Indexed by block number.
     QHash<int, HeaderBlockInfo> m_headerBlocks;
 
@@ -343,15 +349,10 @@ private:
 
     void resizeBuffer(int newCap);
 
-    void highlightCodeBlock(const QTextBlock &p_block, const QString &p_text);
+    void highlightCodeBlock(int p_blockNumber, const QString &p_text);
 
     void highlightMathJax(const QTextBlock &p_block, const QString &p_text);
 
-    // Highlight links using regular expression.
-    // PEG Markdown Highlight treat URLs with spaces illegal. This function is
-    // intended to complement this.
-    void highlightLinkWithSpacesInURL(const QString &p_text);
-
     void parse();
 
     void parseInternal();
@@ -382,6 +383,9 @@ private:
     // Fetch all the verbatim blocks from parsing result.
     void initVerbatimBlocksFromResult();
 
+    // Fetch all the fenced code blocks from parsing result.
+    void initFencedCodeBlocksFromResult();
+
     // Fetch all the inlnie code regions from parsing result.
     void initInlineCodeRegionsFromResult();
 

+ 4 - 0
src/markdownhighlighterdata.h

@@ -0,0 +1,4 @@
+#ifndef MARKDOWNHIGHLIGHTERDATA_H
+#define MARKDOWNHIGHLIGHTERDATA_H
+
+#endif // MARKDOWNHIGHLIGHTERDATA_H

+ 4 - 0
src/resources/themes/v_detorte/v_detorte.mdhl

@@ -134,6 +134,10 @@ foreground: 9e9e9e
 VERBATIM
 foreground: 98c379
 font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
+
+FENCEDCODEBLOCK
+foreground: 98c379
+font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
 # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
 # The last occurence of the same attribute takes effect
 # Could specify multiple attribute in one line

+ 4 - 0
src/resources/themes/v_moonlight/v_moonlight.mdhl

@@ -132,6 +132,10 @@ foreground: 6e7686
 VERBATIM
 foreground: 98c379
 font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
+
+FENCEDCODEBLOCK
+foreground: 98c379
+font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
 # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
 # The last occurence of the same attribute takes effect
 # Could specify multiple attribute in one line

+ 4 - 0
src/resources/themes/v_native/v_native.mdhl

@@ -129,6 +129,10 @@ foreground: 93a1a1
 VERBATIM
 foreground: 673ab7
 font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
+
+FENCEDCODEBLOCK
+foreground: 673ab7
+font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
 # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
 # The last occurence of the same attribute takes effect
 hljs-comment: 6c6c6c

+ 4 - 0
src/resources/themes/v_pure/v_pure.mdhl

@@ -130,6 +130,10 @@ foreground: 93a1a1
 VERBATIM
 foreground: 673ab7
 font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
+
+FENCEDCODEBLOCK
+foreground: 673ab7
+font-family: Consolas, Monaco, Andale Mono, Monospace, Courier New
 # [VNote] Codeblock sylte from HighlightJS (bold, italic, underlined, strikeout, color)
 # The last occurence of the same attribute takes effect
 # Could specify multiple attribute in one line

+ 15 - 1
src/vstyleparser.cpp

@@ -118,6 +118,8 @@ void VStyleParser::parseMarkdownStyle(const QString &styleStr)
     if (markdownStyles) {
         pmh_free_style_collection(markdownStyles);
     }
+
+    // markdownStyles is not indexed by element type.
     markdownStyles = pmh_parse_styles(styleStr.toLocal8Bit().data(),
                                       &markdownStyleErrorCB, this);
 }
@@ -131,6 +133,7 @@ QVector<HighlightingStyle> VStyleParser::fetchMarkdownStyles(const QFont &baseFo
         if (!attr) {
             continue;
         }
+
         HighlightingStyle style;
         style.type = attr->lang_element_type;
         style.format = QTextCharFormatFromAttrs(attr, baseFont);
@@ -143,7 +146,18 @@ QHash<QString, QTextCharFormat> VStyleParser::fetchCodeBlockStyles(const QFont &
 {
     QHash<QString, QTextCharFormat> styles;
 
-    pmh_style_attribute *attrs = markdownStyles->element_styles[pmh_VERBATIM];
+    pmh_style_attribute *attrs = NULL;
+    for (int i = 0; i < pmh_NUM_LANG_TYPES; ++i) {
+        pmh_style_attribute *tmp = markdownStyles->element_styles[i];
+        if (!tmp) {
+            continue;
+        }
+
+        if (tmp->lang_element_type == pmh_FENCEDCODEBLOCK) {
+            attrs = tmp;
+            break;
+        }
+    }
 
     // First set up the base format.
     QTextCharFormat baseFormat = QTextCharFormatFromAttrs(attrs, p_baseFont);

+ 11 - 4
src/vtextdocumentlayout.cpp

@@ -74,10 +74,15 @@ void VTextDocumentLayout::blockRangeFromRect(const QRectF &p_rect,
         return;
     }
 
+    if (document()->blockCount() != m_blocks.size()) {
+        p_first = -1;
+        p_last = -1;
+        return;
+    }
+
     p_first = -1;
     p_last = m_blocks.size() - 1;
     int y = p_rect.y();
-    Q_ASSERT(document()->blockCount() == m_blocks.size());
     QTextBlock block = document()->firstBlock();
     while (block.isValid()) {
         const BlockInfo &info = m_blocks[block.blockNumber()];
@@ -121,9 +126,11 @@ void VTextDocumentLayout::blockRangeFromRectBS(const QRectF &p_rect,
         return;
     }
 
-    Q_ASSERT(document()->blockCount() == m_blocks.size());
-
-    p_first = findBlockByPosition(p_rect.topLeft());
+    if (document()->blockCount() != m_blocks.size()) {
+        p_first = -1;
+    } else {
+        p_first = findBlockByPosition(p_rect.topLeft());
+    }
 
     if (p_first == -1) {
         p_last = -1;