vcodeblockhighlighthelper.cpp 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #include "vcodeblockhighlighthelper.h"
  2. #include <QDebug>
  3. #include <QStringList>
  4. #include "vdocument.h"
  5. #include "utils/vutils.h"
  6. #include "pegmarkdownhighlighter.h"
  7. VCodeBlockHighlightHelper::VCodeBlockHighlightHelper(PegMarkdownHighlighter *p_highlighter,
  8. VDocument *p_vdoc,
  9. MarkdownConverterType p_type)
  10. : QObject(p_highlighter),
  11. m_highlighter(p_highlighter),
  12. m_vdocument(p_vdoc),
  13. m_type(p_type),
  14. m_timeStamp(0)
  15. {
  16. connect(m_highlighter, &PegMarkdownHighlighter::codeBlocksUpdated,
  17. this, &VCodeBlockHighlightHelper::handleCodeBlocksUpdated);
  18. connect(m_vdocument, &VDocument::textHighlighted,
  19. this, &VCodeBlockHighlightHelper::handleTextHighlightResult);
  20. // Web side is ready for code block highlight.
  21. connect(m_vdocument, &VDocument::readyToHighlightText,
  22. m_highlighter, &PegMarkdownHighlighter::updateHighlight);
  23. }
  24. QString VCodeBlockHighlightHelper::unindentCodeBlock(const QString &p_text)
  25. {
  26. if (p_text.isEmpty()) {
  27. return p_text;
  28. }
  29. QStringList lines = p_text.split('\n');
  30. Q_ASSERT(lines[0].trimmed().startsWith("```") || lines[0].trimmed().startsWith("~~~"));
  31. Q_ASSERT(lines.size() > 1);
  32. QRegExp regExp("(^\\s*)");
  33. regExp.indexIn(lines[0]);
  34. V_ASSERT(regExp.captureCount() == 1);
  35. int nrSpaces = regExp.capturedTexts()[1].size();
  36. if (nrSpaces == 0) {
  37. return p_text;
  38. }
  39. QString res = lines[0].right(lines[0].size() - nrSpaces);
  40. for (int i = 1; i < lines.size(); ++i) {
  41. const QString &line = lines[i];
  42. int idx = 0;
  43. while (idx < nrSpaces && idx < line.size() && line[idx].isSpace()) {
  44. ++idx;
  45. }
  46. res = res + "\n" + line.right(line.size() - idx);
  47. }
  48. return res;
  49. }
  50. void VCodeBlockHighlightHelper::handleCodeBlocksUpdated(TimeStamp p_timeStamp,
  51. const QVector<VCodeBlock> &p_codeBlocks)
  52. {
  53. if (!m_vdocument->isReadyToHighlight()) {
  54. // Immediately return empty results.
  55. QVector<HLUnitPos> emptyRes;
  56. for (int i = 0; i < p_codeBlocks.size(); ++i) {
  57. updateHighlightResults(p_timeStamp, 0, emptyRes);
  58. }
  59. return;
  60. }
  61. m_timeStamp = p_timeStamp;
  62. m_codeBlocks = p_codeBlocks;
  63. for (int i = 0; i < m_codeBlocks.size(); ++i) {
  64. const VCodeBlock &block = m_codeBlocks[i];
  65. auto it = m_cache.find(block.m_text);
  66. if (it != m_cache.end()) {
  67. // Hit cache.
  68. qDebug() << "code block highlight hit cache" << p_timeStamp << i;
  69. it.value().m_timeStamp = p_timeStamp;
  70. updateHighlightResults(p_timeStamp, block.m_startPos, it.value().m_units);
  71. } else {
  72. QString unindentedText = unindentCodeBlock(block.m_text);
  73. m_vdocument->highlightTextAsync(unindentedText, i, p_timeStamp);
  74. }
  75. }
  76. }
  77. void VCodeBlockHighlightHelper::handleTextHighlightResult(const QString &p_html,
  78. int p_id,
  79. unsigned long long p_timeStamp)
  80. {
  81. // Abandon obsolete result.
  82. if (m_timeStamp != p_timeStamp) {
  83. return;
  84. }
  85. parseHighlightResult(p_timeStamp, p_id, p_html);
  86. }
  87. static void revertEscapedHtml(QString &p_html)
  88. {
  89. p_html.replace("&gt;", ">").replace("&lt;", "<").replace("&amp;", "&");
  90. }
  91. // Search @p_tokenStr in @p_text from p_index. Spaces after `\n` will not make
  92. // a difference in the match. The matched range will be returned as
  93. // [@p_start, @p_end]. Update @p_index to @p_end + 1.
  94. // Set @p_start and @p_end to -1 to indicate mismatch.
  95. static void matchTokenRelaxed(const QString &p_text, const QString &p_tokenStr,
  96. int &p_index, int &p_start, int &p_end)
  97. {
  98. QString regStr = QRegExp::escape(p_tokenStr);
  99. // Remove the leading spaces.
  100. int nonSpaceIdx = 0;
  101. while (nonSpaceIdx < regStr.size() && regStr[nonSpaceIdx].isSpace()) {
  102. ++nonSpaceIdx;
  103. }
  104. if (nonSpaceIdx > 0 && nonSpaceIdx < regStr.size()) {
  105. regStr.remove(0, nonSpaceIdx);
  106. }
  107. // Do not replace the ending '\n'.
  108. regStr.replace(QRegExp("\n(?!$)"), "\\s+");
  109. QRegExp regExp(regStr);
  110. p_start = p_text.indexOf(regExp, p_index);
  111. if (p_start == -1) {
  112. p_end = -1;
  113. return;
  114. }
  115. p_end = p_start + regExp.matchedLength() - 1;
  116. p_index = p_end + 1;
  117. }
  118. // For now, we could only handle code blocks outside the list.
  119. void VCodeBlockHighlightHelper::parseHighlightResult(TimeStamp p_timeStamp,
  120. int p_idx,
  121. const QString &p_html)
  122. {
  123. const VCodeBlock &block = m_codeBlocks.at(p_idx);
  124. int startPos = block.m_startPos;
  125. QString text = block.m_text;
  126. QVector<HLUnitPos> hlUnits;
  127. bool failed = true;
  128. QXmlStreamReader xml(p_html);
  129. // Must have a fenced line at the front.
  130. // textIndex is the start index in the code block text to search for.
  131. int textIndex = text.indexOf('\n');
  132. if (textIndex == -1) {
  133. goto exit;
  134. }
  135. ++textIndex;
  136. if (xml.readNextStartElement()) {
  137. if (xml.name() != "pre") {
  138. goto exit;
  139. }
  140. if (!xml.readNextStartElement()) {
  141. goto exit;
  142. }
  143. if (xml.name() != "code") {
  144. goto exit;
  145. }
  146. while (xml.readNext()) {
  147. if (xml.isCharacters()) {
  148. // Revert the HTML escape to match.
  149. QString tokenStr = xml.text().toString();
  150. revertEscapedHtml(tokenStr);
  151. int start, end;
  152. matchTokenRelaxed(text, tokenStr, textIndex, start, end);
  153. if (start == -1) {
  154. failed = true;
  155. goto exit;
  156. }
  157. } else if (xml.isStartElement()) {
  158. if (xml.name() != "span") {
  159. failed = true;
  160. goto exit;
  161. }
  162. if (!parseSpanElement(xml, text, textIndex, hlUnits)) {
  163. failed = true;
  164. goto exit;
  165. }
  166. } else if (xml.isEndElement()) {
  167. if (xml.name() != "code" && xml.name() != "pre") {
  168. failed = true;
  169. } else {
  170. failed = false;
  171. }
  172. goto exit;
  173. } else {
  174. failed = true;
  175. goto exit;
  176. }
  177. }
  178. }
  179. exit:
  180. // Pass result back to highlighter.
  181. // Abandon obsolete result.
  182. if (m_timeStamp != p_timeStamp) {
  183. return;
  184. }
  185. if (xml.hasError() || failed) {
  186. qWarning() << "fail to parse highlighted result"
  187. << "stamp:" << p_timeStamp << "index:" << p_idx << p_html;
  188. hlUnits.clear();
  189. }
  190. // Add it to cache.
  191. addToHighlightCache(text, p_timeStamp, hlUnits);
  192. updateHighlightResults(p_timeStamp, startPos, hlUnits);
  193. }
  194. void VCodeBlockHighlightHelper::updateHighlightResults(TimeStamp p_timeStamp,
  195. int p_startPos,
  196. QVector<HLUnitPos> p_units)
  197. {
  198. for (int i = 0; i < p_units.size(); ++i) {
  199. p_units[i].m_position += p_startPos;
  200. }
  201. // We need to call this function anyway to trigger the rehighlight.
  202. m_highlighter->setCodeBlockHighlights(p_timeStamp, p_units);
  203. }
  204. bool VCodeBlockHighlightHelper::parseSpanElement(QXmlStreamReader &p_xml,
  205. const QString &p_text,
  206. int &p_index,
  207. QVector<HLUnitPos> &p_units)
  208. {
  209. int unitStart = p_index;
  210. QString style = p_xml.attributes().value("class").toString();
  211. while (p_xml.readNext()) {
  212. if (p_xml.isCharacters()) {
  213. // Revert the HTML escape to match.
  214. QString tokenStr = p_xml.text().toString();
  215. revertEscapedHtml(tokenStr);
  216. int start, end;
  217. matchTokenRelaxed(p_text, tokenStr, p_index, start, end);
  218. if (start == -1) {
  219. return false;
  220. }
  221. } else if (p_xml.isStartElement()) {
  222. if (p_xml.name() != "span") {
  223. return false;
  224. }
  225. // Sub-span.
  226. if (!parseSpanElement(p_xml, p_text, p_index, p_units)) {
  227. return false;
  228. }
  229. } else if (p_xml.isEndElement()) {
  230. if (p_xml.name() != "span") {
  231. return false;
  232. }
  233. // Got a complete span. Use relative position here.
  234. HLUnitPos unit(unitStart, p_index - unitStart, style);
  235. p_units.append(unit);
  236. return true;
  237. } else {
  238. return false;
  239. }
  240. }
  241. return false;
  242. }
  243. void VCodeBlockHighlightHelper::addToHighlightCache(const QString &p_text,
  244. TimeStamp p_timeStamp,
  245. const QVector<HLUnitPos> &p_units)
  246. {
  247. const int c_maxEntries = 100;
  248. const TimeStamp c_maxTimeStampSpan = 3;
  249. if (m_cache.size() >= c_maxEntries) {
  250. // Remove the oldest one.
  251. TimeStamp ts = p_timeStamp - c_maxTimeStampSpan;
  252. for (auto it = m_cache.begin(); it != m_cache.end();) {
  253. if (it.value().m_timeStamp < ts) {
  254. it = m_cache.erase(it);
  255. } else {
  256. ++it;
  257. }
  258. }
  259. }
  260. m_cache.insert(p_text, HLResult(p_timeStamp, p_units));
  261. }