Просмотр исходного кода

MdEditor: support pasting HTML as converted Markdown text via Turndown

Le Tan 7 лет назад
Родитель
Сommit
dc1f1c4535

+ 1 - 0
README.md

@@ -206,6 +206,7 @@ In VNote, almost everything is configurable, such as background color, font, and
 - [flowchart.js](https://github.com/adrai/flowchart.js) (MIT License)
 - [PlantUML](http://plantuml.com/) (MIT License)
 - [dom-to-image](https://github.com/tsayen/dom-to-image) (MIT License)
+- [turndown](https://github.com/domchristie/turndown) (MIT License)
 
 # License
 VNote is licensed under the [MIT license](http://opensource.org/licenses/MIT).

+ 1 - 0
README_zh.md

@@ -207,6 +207,7 @@ VNote中,几乎一切都是可以定制的,例如背景颜色、字体以及
 - [flowchart.js](https://github.com/adrai/flowchart.js) (MIT License)
 - [PlantUML](http://plantuml.com/) (MIT License)
 - [dom-to-image](https://github.com/tsayen/dom-to-image) (MIT License)
+- [turndown](https://github.com/domchristie/turndown) (MIT License)
 
 # 代码许可
 VNote使用[MIT许可](http://opensource.org/licenses/MIT)。

+ 112 - 0
src/resources/markdown_template.js

@@ -171,6 +171,10 @@ new QWebChannel(qt.webChannelTransport,
             content.noticeReadyToHighlightText();
         }
 
+        if (typeof htmlToText == "function") {
+            content.requestHtmlToText.connect(htmlToText);
+        }
+
         if (typeof textToHtml == "function") {
             content.requestTextToHtml.connect(textToHtml);
             content.noticeReadyToTextToHtml();
@@ -1465,3 +1469,111 @@ var setPreviewContent = function(lang, html) {
         previewDiv.className = '';
     }
 };
+
+var htmlToText = function(identifier, id, timeStamp, html) {
+    var splitString = function(str) {
+        var result = { leadingSpaces: '',
+                       content: '',
+                       trailingSpaces: ''
+                     };
+        if (!str) {
+            return result;
+        }
+
+        var lRe = /^\s+/;
+        var ret = lRe.exec(str);
+        if (ret) {
+            result.leadingSpaces = ret[0];
+            if (result.leadingSpaces.length == str.length) {
+                return result;
+            }
+        }
+
+        var tRe = /\s+$/;
+        ret = tRe.exec(str);
+        if (ret) {
+            result.trailingSpaces = ret[0];
+        }
+
+        result.content = str.slice(result.leadingSpaces.length,
+                                   str.length - result.trailingSpaces.length);
+        return result;
+    };
+
+    var gfm = turndownPluginGfm.gfm
+    var ts = new TurndownService({ headingStyle: 'atx',
+                                   bulletListMarker: '-',
+                                   emDelimiter: '*',
+                                   hr: '***',
+                                   codeBlockStyle: 'fenced',
+                                   blankReplacement: function(content, node) {
+                                       if (node.nodeName == 'SPAN') {
+                                           return content;
+                                       }
+
+                                       return node.isBlock ? '\n\n' : ''
+                                   }
+                                 });
+    ts.use(gfm);
+    ts.addRule('emspan', {
+        filter: 'span',
+        replacement: function(content, node, options) {
+            if (node.style.fontWeight == 'bold') {
+                var con = splitString(content);
+                if (!con.content) {
+                    return content;
+                }
+
+                return con.leadingSpaces + options.strongDelimiter
+                       + con.content
+                       + options.strongDelimiter + con.trailingSpaces;
+            } else if (node.style.fontStyle == 'italic') {
+                var con = splitString(content);
+                if (!con.content) {
+                    return content;
+                }
+
+                return con.leadingSpaces + options.emDelimiter
+                       + con.content
+                       + options.emDelimiter + con.trailingSpaces;
+            } else {
+                return content;
+            }
+        }
+    });
+    ts.addRule('mark', {
+        filter: 'mark',
+        replacement: function(content, node, options) {
+            return '<mark>' + content + '</mark>';
+        }
+    });
+    ts.addRule('emphasis_fix', {
+        filter: ['em', 'i'],
+        replacement: function (content, node, options) {
+            var con = splitString(content);
+            if (!con.content) {
+                return content;
+            }
+
+            return con.leadingSpaces + options.emDelimiter
+                   + con.content
+                   + options.emDelimiter + con.trailingSpaces;
+        }
+    });
+    ts.addRule('strong_fix', {
+        filter: ['strong', 'b'],
+        replacement: function (content, node, options) {
+            var con = splitString(content);
+            if (!con.content) {
+                return content;
+            }
+
+            return con.leadingSpaces + options.strongDelimiter
+                   + con.content
+                   + options.strongDelimiter + con.trailingSpaces;
+        }
+    });
+
+    var markdown = ts.turndown(html);
+    content.htmlToTextCB(identifier, id, timeStamp, markdown);
+};

+ 7 - 0
src/utils/turndown/README.md

@@ -0,0 +1,7 @@
+# [turndown](https://github.com/domchristie/turndown)
+v4.0.2  
+Dom Christie
+
+# [turndown](https://github.com/domchristie/turndown-plugin-gfm)
+v1.0.2  
+Dom Christie

+ 165 - 0
src/utils/turndown/turndown-plugin-gfm.js

@@ -0,0 +1,165 @@
+var turndownPluginGfm = (function (exports) {
+'use strict';
+
+var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
+
+function highlightedCodeBlock (turndownService) {
+  turndownService.addRule('highlightedCodeBlock', {
+    filter: function (node) {
+      var firstChild = node.firstChild;
+      return (
+        node.nodeName === 'DIV' &&
+        highlightRegExp.test(node.className) &&
+        firstChild &&
+        firstChild.nodeName === 'PRE'
+      )
+    },
+    replacement: function (content, node, options) {
+      var className = node.className || '';
+      var language = (className.match(highlightRegExp) || [null, ''])[1];
+
+      return (
+        '\n\n' + options.fence + language + '\n' +
+        node.firstChild.textContent +
+        '\n' + options.fence + '\n\n'
+      )
+    }
+  });
+}
+
+function strikethrough (turndownService) {
+  turndownService.addRule('strikethrough', {
+    filter: ['del', 's', 'strike'],
+    replacement: function (content) {
+      return '~' + content + '~'
+    }
+  });
+}
+
+var indexOf = Array.prototype.indexOf;
+var every = Array.prototype.every;
+var rules = {};
+
+rules.tableCell = {
+  filter: ['th', 'td'],
+  replacement: function (content, node) {
+    return cell(content, node)
+  }
+};
+
+rules.tableRow = {
+  filter: 'tr',
+  replacement: function (content, node) {
+    var borderCells = '';
+    var alignMap = { left: ':--', right: '--:', center: ':-:' };
+
+    if (isHeadingRow(node)) {
+      for (var i = 0; i < node.childNodes.length; i++) {
+        var border = '---';
+        var align = (
+          node.childNodes[i].getAttribute('align') || ''
+        ).toLowerCase();
+
+        if (align) border = alignMap[align] || border;
+
+        borderCells += cell(border, node.childNodes[i]);
+      }
+    }
+    return '\n' + content + (borderCells ? '\n' + borderCells : '')
+  }
+};
+
+rules.table = {
+  // Only convert tables with a heading row.
+  // Tables with no heading row are kept using `keep` (see below).
+  filter: function (node) {
+    return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
+  },
+
+  replacement: function (content) {
+    // Ensure there are no blank lines
+    content = content.replace('\n\n', '\n');
+    return '\n\n' + content + '\n\n'
+  }
+};
+
+rules.tableSection = {
+  filter: ['thead', 'tbody', 'tfoot'],
+  replacement: function (content) {
+    return content
+  }
+};
+
+// A tr is a heading row if:
+// - the parent is a THEAD
+// - or if its the first child of the TABLE or the first TBODY (possibly
+//   following a blank THEAD)
+// - and every cell is a TH
+function isHeadingRow (tr) {
+  var parentNode = tr.parentNode;
+  return (
+    parentNode.nodeName === 'THEAD' ||
+    (
+      parentNode.firstChild === tr &&
+      (parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
+      every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
+    )
+  )
+}
+
+function isFirstTbody (element) {
+  var previousSibling = element.previousSibling;
+  return (
+    element.nodeName === 'TBODY' && (
+      !previousSibling ||
+      (
+        previousSibling.nodeName === 'THEAD' &&
+        /^\s*$/i.test(previousSibling.textContent)
+      )
+    )
+  )
+}
+
+function cell (content, node) {
+  var index = indexOf.call(node.parentNode.childNodes, node);
+  var prefix = ' ';
+  if (index === 0) prefix = '| ';
+  return prefix + content + ' |'
+}
+
+function tables (turndownService) {
+  turndownService.keep(function (node) {
+    return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
+  });
+  for (var key in rules) turndownService.addRule(key, rules[key]);
+}
+
+function taskListItems (turndownService) {
+  turndownService.addRule('taskListItems', {
+    filter: function (node) {
+      return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
+    },
+    replacement: function (content, node) {
+      return (node.checked ? '[x]' : '[ ]') + ' '
+    }
+  });
+}
+
+function gfm (turndownService) {
+  turndownService.use([
+    highlightedCodeBlock,
+    strikethrough,
+    tables,
+    taskListItems
+  ]);
+}
+
+exports.gfm = gfm;
+exports.highlightedCodeBlock = highlightedCodeBlock;
+exports.strikethrough = strikethrough;
+exports.tables = tables;
+exports.taskListItems = taskListItems;
+
+return exports;
+
+}({}));

+ 932 - 0
src/utils/turndown/turndown.js

@@ -0,0 +1,932 @@
+var TurndownService = (function () {
+'use strict';
+
+function extend (destination) {
+  for (var i = 1; i < arguments.length; i++) {
+    var source = arguments[i];
+    for (var key in source) {
+      if (source.hasOwnProperty(key)) destination[key] = source[key];
+    }
+  }
+  return destination
+}
+
+function repeat (character, count) {
+  return Array(count + 1).join(character)
+}
+
+var blockElements = [
+  'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
+  'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
+  'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+  'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
+  'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
+  'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
+];
+
+function isBlock (node) {
+  return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
+}
+
+var voidElements = [
+  'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
+  'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
+];
+
+function isVoid (node) {
+  return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
+}
+
+var voidSelector = voidElements.join();
+function hasVoid (node) {
+  return node.querySelector && node.querySelector(voidSelector)
+}
+
+var rules = {};
+
+rules.paragraph = {
+  filter: 'p',
+
+  replacement: function (content) {
+    return '\n\n' + content + '\n\n'
+  }
+};
+
+rules.lineBreak = {
+  filter: 'br',
+
+  replacement: function (content, node, options) {
+    return options.br + '\n'
+  }
+};
+
+rules.heading = {
+  filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
+
+  replacement: function (content, node, options) {
+    var hLevel = Number(node.nodeName.charAt(1));
+
+    if (options.headingStyle === 'setext' && hLevel < 3) {
+      var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
+      return (
+        '\n\n' + content + '\n' + underline + '\n\n'
+      )
+    } else {
+      return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
+    }
+  }
+};
+
+rules.blockquote = {
+  filter: 'blockquote',
+
+  replacement: function (content) {
+    content = content.replace(/^\n+|\n+$/g, '');
+    content = content.replace(/^/gm, '> ');
+    return '\n\n' + content + '\n\n'
+  }
+};
+
+rules.list = {
+  filter: ['ul', 'ol'],
+
+  replacement: function (content, node) {
+    var parent = node.parentNode;
+    if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
+      return '\n' + content
+    } else {
+      return '\n\n' + content + '\n\n'
+    }
+  }
+};
+
+rules.listItem = {
+  filter: 'li',
+
+  replacement: function (content, node, options) {
+    content = content
+      .replace(/^\n+/, '') // remove leading newlines
+      .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
+      .replace(/\n/gm, '\n    '); // indent
+    var prefix = options.bulletListMarker + '   ';
+    var parent = node.parentNode;
+    if (parent.nodeName === 'OL') {
+      var start = parent.getAttribute('start');
+      var index = Array.prototype.indexOf.call(parent.children, node);
+      prefix = (start ? Number(start) + index : index + 1) + '.  ';
+    }
+    return (
+      prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
+    )
+  }
+};
+
+rules.indentedCodeBlock = {
+  filter: function (node, options) {
+    return (
+      options.codeBlockStyle === 'indented' &&
+      node.nodeName === 'PRE' &&
+      node.firstChild &&
+      node.firstChild.nodeName === 'CODE'
+    )
+  },
+
+  replacement: function (content, node, options) {
+    return (
+      '\n\n    ' +
+      node.firstChild.textContent.replace(/\n/g, '\n    ') +
+      '\n\n'
+    )
+  }
+};
+
+rules.fencedCodeBlock = {
+  filter: function (node, options) {
+    return (
+      options.codeBlockStyle === 'fenced' &&
+      node.nodeName === 'PRE' &&
+      node.firstChild &&
+      node.firstChild.nodeName === 'CODE'
+    )
+  },
+
+  replacement: function (content, node, options) {
+    var className = node.firstChild.className || '';
+    var language = (className.match(/language-(\S+)/) || [null, ''])[1];
+
+    return (
+      '\n\n' + options.fence + language + '\n' +
+      node.firstChild.textContent +
+      '\n' + options.fence + '\n\n'
+    )
+  }
+};
+
+rules.horizontalRule = {
+  filter: 'hr',
+
+  replacement: function (content, node, options) {
+    return '\n\n' + options.hr + '\n\n'
+  }
+};
+
+rules.inlineLink = {
+  filter: function (node, options) {
+    return (
+      options.linkStyle === 'inlined' &&
+      node.nodeName === 'A' &&
+      node.getAttribute('href')
+    )
+  },
+
+  replacement: function (content, node) {
+    var href = node.getAttribute('href');
+    var title = node.title ? ' "' + node.title + '"' : '';
+    return '[' + content + '](' + href + title + ')'
+  }
+};
+
+rules.referenceLink = {
+  filter: function (node, options) {
+    return (
+      options.linkStyle === 'referenced' &&
+      node.nodeName === 'A' &&
+      node.getAttribute('href')
+    )
+  },
+
+  replacement: function (content, node, options) {
+    var href = node.getAttribute('href');
+    var title = node.title ? ' "' + node.title + '"' : '';
+    var replacement;
+    var reference;
+
+    switch (options.linkReferenceStyle) {
+      case 'collapsed':
+        replacement = '[' + content + '][]';
+        reference = '[' + content + ']: ' + href + title;
+        break
+      case 'shortcut':
+        replacement = '[' + content + ']';
+        reference = '[' + content + ']: ' + href + title;
+        break
+      default:
+        var id = this.references.length + 1;
+        replacement = '[' + content + '][' + id + ']';
+        reference = '[' + id + ']: ' + href + title;
+    }
+
+    this.references.push(reference);
+    return replacement
+  },
+
+  references: [],
+
+  append: function (options) {
+    var references = '';
+    if (this.references.length) {
+      references = '\n\n' + this.references.join('\n') + '\n\n';
+      this.references = []; // Reset references
+    }
+    return references
+  }
+};
+
+rules.emphasis = {
+  filter: ['em', 'i'],
+
+  replacement: function (content, node, options) {
+    if (!content.trim()) return ''
+    return options.emDelimiter + content + options.emDelimiter
+  }
+};
+
+rules.strong = {
+  filter: ['strong', 'b'],
+
+  replacement: function (content, node, options) {
+    if (!content.trim()) return ''
+    return options.strongDelimiter + content + options.strongDelimiter
+  }
+};
+
+rules.code = {
+  filter: function (node) {
+    var hasSiblings = node.previousSibling || node.nextSibling;
+    var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
+
+    return node.nodeName === 'CODE' && !isCodeBlock
+  },
+
+  replacement: function (content) {
+    if (!content.trim()) return ''
+
+    var delimiter = '`';
+    var leadingSpace = '';
+    var trailingSpace = '';
+    var matches = content.match(/`+/gm);
+    if (matches) {
+      if (/^`/.test(content)) leadingSpace = ' ';
+      if (/`$/.test(content)) trailingSpace = ' ';
+      while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
+    }
+
+    return delimiter + leadingSpace + content + trailingSpace + delimiter
+  }
+};
+
+rules.image = {
+  filter: 'img',
+
+  replacement: function (content, node) {
+    var alt = node.alt || '';
+    var src = node.getAttribute('src') || '';
+    var title = node.title || '';
+    var titlePart = title ? ' "' + title + '"' : '';
+    return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
+  }
+};
+
+/**
+ * Manages a collection of rules used to convert HTML to Markdown
+ */
+
+function Rules (options) {
+  this.options = options;
+  this._keep = [];
+  this._remove = [];
+
+  this.blankRule = {
+    replacement: options.blankReplacement
+  };
+
+  this.keepReplacement = options.keepReplacement;
+
+  this.defaultRule = {
+    replacement: options.defaultReplacement
+  };
+
+  this.array = [];
+  for (var key in options.rules) this.array.push(options.rules[key]);
+}
+
+Rules.prototype = {
+  add: function (key, rule) {
+    this.array.unshift(rule);
+  },
+
+  keep: function (filter) {
+    this._keep.unshift({
+      filter: filter,
+      replacement: this.keepReplacement
+    });
+  },
+
+  remove: function (filter) {
+    this._remove.unshift({
+      filter: filter,
+      replacement: function () {
+        return ''
+      }
+    });
+  },
+
+  forNode: function (node) {
+    if (node.isBlank) return this.blankRule
+    var rule;
+
+    if ((rule = findRule(this.array, node, this.options))) return rule
+    if ((rule = findRule(this._keep, node, this.options))) return rule
+    if ((rule = findRule(this._remove, node, this.options))) return rule
+
+    return this.defaultRule
+  },
+
+  forEach: function (fn) {
+    for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
+  }
+};
+
+function findRule (rules, node, options) {
+  for (var i = 0; i < rules.length; i++) {
+    var rule = rules[i];
+    if (filterValue(rule, node, options)) return rule
+  }
+  return void 0
+}
+
+function filterValue (rule, node, options) {
+  var filter = rule.filter;
+  if (typeof filter === 'string') {
+    if (filter === node.nodeName.toLowerCase()) return true
+  } else if (Array.isArray(filter)) {
+    if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
+  } else if (typeof filter === 'function') {
+    if (filter.call(rule, node, options)) return true
+  } else {
+    throw new TypeError('`filter` needs to be a string, array, or function')
+  }
+}
+
+/**
+ * The collapseWhitespace function is adapted from collapse-whitespace
+ * by Luc Thevenard.
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2014 Luc Thevenard <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * collapseWhitespace(options) removes extraneous whitespace from an the given element.
+ *
+ * @param {Object} options
+ */
+function collapseWhitespace (options) {
+  var element = options.element;
+  var isBlock = options.isBlock;
+  var isVoid = options.isVoid;
+  var isPre = options.isPre || function (node) {
+    return node.nodeName === 'PRE'
+  };
+
+  if (!element.firstChild || isPre(element)) return
+
+  var prevText = null;
+  var prevVoid = false;
+
+  var prev = null;
+  var node = next(prev, element, isPre);
+
+  while (node !== element) {
+    if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
+      var text = node.data.replace(/[ \r\n\t]+/g, ' ');
+
+      if ((!prevText || / $/.test(prevText.data)) &&
+          !prevVoid && text[0] === ' ') {
+        text = text.substr(1);
+      }
+
+      // `text` might be empty at this point.
+      if (!text) {
+        node = remove(node);
+        continue
+      }
+
+      node.data = text;
+
+      prevText = node;
+    } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
+      if (isBlock(node) || node.nodeName === 'BR') {
+        if (prevText) {
+          prevText.data = prevText.data.replace(/ $/, '');
+        }
+
+        prevText = null;
+        prevVoid = false;
+      } else if (isVoid(node)) {
+        // Avoid trimming space around non-block, non-BR void elements.
+        prevText = null;
+        prevVoid = true;
+      }
+    } else {
+      node = remove(node);
+      continue
+    }
+
+    var nextNode = next(prev, node, isPre);
+    prev = node;
+    node = nextNode;
+  }
+
+  if (prevText) {
+    prevText.data = prevText.data.replace(/ $/, '');
+    if (!prevText.data) {
+      remove(prevText);
+    }
+  }
+}
+
+/**
+ * remove(node) removes the given node from the DOM and returns the
+ * next node in the sequence.
+ *
+ * @param {Node} node
+ * @return {Node} node
+ */
+function remove (node) {
+  var next = node.nextSibling || node.parentNode;
+
+  node.parentNode.removeChild(node);
+
+  return next
+}
+
+/**
+ * next(prev, current, isPre) returns the next node in the sequence, given the
+ * current and previous nodes.
+ *
+ * @param {Node} prev
+ * @param {Node} current
+ * @param {Function} isPre
+ * @return {Node}
+ */
+function next (prev, current, isPre) {
+  if ((prev && prev.parentNode === current) || isPre(current)) {
+    return current.nextSibling || current.parentNode
+  }
+
+  return current.firstChild || current.nextSibling || current.parentNode
+}
+
+/*
+ * Set up window for Node.js
+ */
+
+var root = (typeof window !== 'undefined' ? window : {});
+
+/*
+ * Parsing HTML strings
+ */
+
+function canParseHTMLNatively () {
+  var Parser = root.DOMParser;
+  var canParse = false;
+
+  // Adapted from https://gist.github.com/1129031
+  // Firefox/Opera/IE throw errors on unsupported types
+  try {
+    // WebKit returns null on unsupported types
+    if (new Parser().parseFromString('', 'text/html')) {
+      canParse = true;
+    }
+  } catch (e) {}
+
+  return canParse
+}
+
+function createHTMLParser () {
+  var Parser = function () {};
+
+  {
+    if (shouldUseActiveX()) {
+      Parser.prototype.parseFromString = function (string) {
+        var doc = new window.ActiveXObject('htmlfile');
+        doc.designMode = 'on'; // disable on-page scripts
+        doc.open();
+        doc.write(string);
+        doc.close();
+        return doc
+      };
+    } else {
+      Parser.prototype.parseFromString = function (string) {
+        var doc = document.implementation.createHTMLDocument('');
+        doc.open();
+        doc.write(string);
+        doc.close();
+        return doc
+      };
+    }
+  }
+  return Parser
+}
+
+function shouldUseActiveX () {
+  var useActiveX = false;
+  try {
+    document.implementation.createHTMLDocument('').open();
+  } catch (e) {
+    if (window.ActiveXObject) useActiveX = true;
+  }
+  return useActiveX
+}
+
+var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
+
+function RootNode (input) {
+  var root;
+  if (typeof input === 'string') {
+    var doc = htmlParser().parseFromString(
+      // DOM parsers arrange elements in the <head> and <body>.
+      // Wrapping in a custom element ensures elements are reliably arranged in
+      // a single element.
+      '<x-turndown id="turndown-root">' + input + '</x-turndown>',
+      'text/html'
+    );
+    root = doc.getElementById('turndown-root');
+  } else {
+    root = input.cloneNode(true);
+  }
+  collapseWhitespace({
+    element: root,
+    isBlock: isBlock,
+    isVoid: isVoid
+  });
+
+  return root
+}
+
+var _htmlParser;
+function htmlParser () {
+  _htmlParser = _htmlParser || new HTMLParser();
+  return _htmlParser
+}
+
+function Node (node) {
+  node.isBlock = isBlock(node);
+  node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
+  node.isBlank = isBlank(node);
+  node.flankingWhitespace = flankingWhitespace(node);
+  return node
+}
+
+function isBlank (node) {
+  return (
+    ['A', 'TH', 'TD'].indexOf(node.nodeName) === -1 &&
+    /^\s*$/i.test(node.textContent) &&
+    !isVoid(node) &&
+    !hasVoid(node)
+  )
+}
+
+function flankingWhitespace (node) {
+  var leading = '';
+  var trailing = '';
+
+  if (!node.isBlock) {
+    var hasLeading = /^[ \r\n\t]/.test(node.textContent);
+    var hasTrailing = /[ \r\n\t]$/.test(node.textContent);
+
+    if (hasLeading && !isFlankedByWhitespace('left', node)) {
+      leading = ' ';
+    }
+    if (hasTrailing && !isFlankedByWhitespace('right', node)) {
+      trailing = ' ';
+    }
+  }
+
+  return { leading: leading, trailing: trailing }
+}
+
+function isFlankedByWhitespace (side, node) {
+  var sibling;
+  var regExp;
+  var isFlanked;
+
+  if (side === 'left') {
+    sibling = node.previousSibling;
+    regExp = / $/;
+  } else {
+    sibling = node.nextSibling;
+    regExp = /^ /;
+  }
+
+  if (sibling) {
+    if (sibling.nodeType === 3) {
+      isFlanked = regExp.test(sibling.nodeValue);
+    } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
+      isFlanked = regExp.test(sibling.textContent);
+    }
+  }
+  return isFlanked
+}
+
+var reduce = Array.prototype.reduce;
+var leadingNewLinesRegExp = /^\n*/;
+var trailingNewLinesRegExp = /\n*$/;
+
+function TurndownService (options) {
+  if (!(this instanceof TurndownService)) return new TurndownService(options)
+
+  var defaults = {
+    rules: rules,
+    headingStyle: 'setext',
+    hr: '* * *',
+    bulletListMarker: '*',
+    codeBlockStyle: 'indented',
+    fence: '```',
+    emDelimiter: '_',
+    strongDelimiter: '**',
+    linkStyle: 'inlined',
+    linkReferenceStyle: 'full',
+    br: '  ',
+    blankReplacement: function (content, node) {
+      return node.isBlock ? '\n\n' : ''
+    },
+    keepReplacement: function (content, node) {
+      return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
+    },
+    defaultReplacement: function (content, node) {
+      return node.isBlock ? '\n\n' + content + '\n\n' : content
+    }
+  };
+  this.options = extend({}, defaults, options);
+  this.rules = new Rules(this.options);
+}
+
+TurndownService.prototype = {
+  /**
+   * The entry point for converting a string or DOM node to Markdown
+   * @public
+   * @param {String|HTMLElement} input The string or DOM node to convert
+   * @returns A Markdown representation of the input
+   * @type String
+   */
+
+  turndown: function (input) {
+    if (!canConvert(input)) {
+      throw new TypeError(
+        input + ' is not a string, or an element/document/fragment node.'
+      )
+    }
+
+    if (input === '') return ''
+
+    var output = process.call(this, new RootNode(input));
+    return postProcess.call(this, output)
+  },
+
+  /**
+   * Add one or more plugins
+   * @public
+   * @param {Function|Array} plugin The plugin or array of plugins to add
+   * @returns The Turndown instance for chaining
+   * @type Object
+   */
+
+  use: function (plugin) {
+    if (Array.isArray(plugin)) {
+      for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
+    } else if (typeof plugin === 'function') {
+      plugin(this);
+    } else {
+      throw new TypeError('plugin must be a Function or an Array of Functions')
+    }
+    return this
+  },
+
+  /**
+   * Adds a rule
+   * @public
+   * @param {String} key The unique key of the rule
+   * @param {Object} rule The rule
+   * @returns The Turndown instance for chaining
+   * @type Object
+   */
+
+  addRule: function (key, rule) {
+    this.rules.add(key, rule);
+    return this
+  },
+
+  /**
+   * Keep a node (as HTML) that matches the filter
+   * @public
+   * @param {String|Array|Function} filter The unique key of the rule
+   * @returns The Turndown instance for chaining
+   * @type Object
+   */
+
+  keep: function (filter) {
+    this.rules.keep(filter);
+    return this
+  },
+
+  /**
+   * Remove a node that matches the filter
+   * @public
+   * @param {String|Array|Function} filter The unique key of the rule
+   * @returns The Turndown instance for chaining
+   * @type Object
+   */
+
+  remove: function (filter) {
+    this.rules.remove(filter);
+    return this
+  },
+
+  /**
+   * Escapes Markdown syntax
+   * @public
+   * @param {String} string The string to escape
+   * @returns A string with Markdown syntax escaped
+   * @type String
+   */
+
+  escape: function (string) {
+    return (
+      string
+        // Escape backslash escapes!
+        .replace(/\\(\S)/g, '\\\\$1')
+
+        // Escape headings
+        .replace(/^(#{1,6} )/gm, '\\$1')
+
+        // Escape hr
+        .replace(/^([-*_] *){3,}$/gm, function (match, character) {
+          return match.split(character).join('\\' + character)
+        })
+
+        // Escape ol bullet points
+        .replace(/^(\W* {0,3})(\d+)\. /gm, '$1$2\\. ')
+
+        // Escape ul bullet points
+        .replace(/^([^\\\w]*)[*+-] /gm, function (match) {
+          return match.replace(/([*+-])/g, '\\$1')
+        })
+
+        // Escape blockquote indents
+        .replace(/^(\W* {0,3})> /gm, '$1\\> ')
+
+        // Escape em/strong *
+        .replace(/\*+(?![*\s\W]).+?\*+/g, function (match) {
+          return match.replace(/\*/g, '\\*')
+        })
+
+        // Escape em/strong _
+        .replace(/_+(?![_\s\W]).+?_+/g, function (match) {
+          return match.replace(/_/g, '\\_')
+        })
+
+        // Escape code _
+        .replace(/`+(?![`\s\W]).+?`+/g, function (match) {
+          return match.replace(/`/g, '\\`')
+        })
+
+        // Escape link brackets
+        .replace(/[\[\]]/g, '\\$&') // eslint-disable-line no-useless-escape
+    )
+  }
+};
+
+/**
+ * Reduces a DOM node down to its Markdown string equivalent
+ * @private
+ * @param {HTMLElement} parentNode The node to convert
+ * @returns A Markdown representation of the node
+ * @type String
+ */
+
+function process (parentNode) {
+  var self = this;
+  return reduce.call(parentNode.childNodes, function (output, node) {
+    node = new Node(node);
+
+    var replacement = '';
+    if (node.nodeType === 3) {
+      replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
+    } else if (node.nodeType === 1) {
+      replacement = replacementForNode.call(self, node);
+    }
+
+    return join(output, replacement)
+  }, '')
+}
+
+/**
+ * Appends strings as each rule requires and trims the output
+ * @private
+ * @param {String} output The conversion output
+ * @returns A trimmed version of the ouput
+ * @type String
+ */
+
+function postProcess (output) {
+  var self = this;
+  this.rules.forEach(function (rule) {
+    if (typeof rule.append === 'function') {
+      output = join(output, rule.append(self.options));
+    }
+  });
+
+  return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
+}
+
+/**
+ * Converts an element node to its Markdown equivalent
+ * @private
+ * @param {HTMLElement} node The node to convert
+ * @returns A Markdown representation of the node
+ * @type String
+ */
+
+function replacementForNode (node) {
+  var rule = this.rules.forNode(node);
+  var content = process.call(this, node);
+  var whitespace = node.flankingWhitespace;
+  if (whitespace.leading || whitespace.trailing) content = content.trim();
+  return (
+    whitespace.leading +
+    rule.replacement(content, node, this.options) +
+    whitespace.trailing
+  )
+}
+
+/**
+ * Determines the new lines between the current output and the replacement
+ * @private
+ * @param {String} output The current conversion output
+ * @param {String} replacement The string to append to the output
+ * @returns The whitespace to separate the current output and the replacement
+ * @type String
+ */
+
+function separatingNewlines (output, replacement) {
+  var newlines = [
+    output.match(trailingNewLinesRegExp)[0],
+    replacement.match(leadingNewLinesRegExp)[0]
+  ].sort();
+  var maxNewlines = newlines[newlines.length - 1];
+  return maxNewlines.length < 2 ? maxNewlines : '\n\n'
+}
+
+function join (string1, string2) {
+  var separator = separatingNewlines(string1, string2);
+
+  // Remove trailing/leading newlines and replace with separator
+  string1 = string1.replace(trailingNewLinesRegExp, '');
+  string2 = string2.replace(leadingNewLinesRegExp, '');
+
+  return string1 + separator + string2
+}
+
+/**
+ * Determines whether an input can be converted
+ * @private
+ * @param {String|HTMLElement} input Describe this parameter
+ * @returns Describe what it returns
+ * @type String|Object|Array|Boolean|Number
+ */
+
+function canConvert (input) {
+  return (
+    input != null && (
+      typeof input === 'string' ||
+      (input.nodeType && (
+        input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
+      ))
+    )
+  )
+}
+
+return TurndownService;
+
+}());

+ 3 - 0
src/utils/vutils.cpp

@@ -735,6 +735,9 @@ QString VUtils::generateHtmlTemplate(const QString &p_template,
         Q_ASSERT(false);
     }
 
+    extraFile += "<script src=\"qrc" + VNote::c_turndownJsFile + "\"></script>\n";
+    extraFile += "<script src=\"qrc" + VNote::c_turndownGfmExtraFile + "\"></script>\n";
+
     if (g_config->getEnableMermaid()) {
         extraFile += "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + g_config->getMermaidCssStyleUrl() + "\"/>\n" +
                      "<script src=\"qrc" + VNote::c_mermaidApiJsFile + "\"></script>\n" +

+ 13 - 0
src/vdocument.cpp

@@ -94,6 +94,14 @@ void VDocument::textToHtmlAsync(int p_identitifer,
     emit requestTextToHtml(p_identitifer, p_id, p_timeStamp, p_text, p_inlineStyle);
 }
 
+void VDocument::htmlToTextAsync(int p_identitifer,
+                                int p_id,
+                                int p_timeStamp,
+                                const QString &p_html)
+{
+    emit requestHtmlToText(p_identitifer, p_id, p_timeStamp, p_html);
+}
+
 void VDocument::getHtmlContentAsync()
 {
     emit requestHtmlContent();
@@ -104,6 +112,11 @@ void VDocument::textToHtmlCB(int p_identitifer, int p_id, int p_timeStamp, const
     emit textToHtmlFinished(p_identitifer, p_id, p_timeStamp, p_html);
 }
 
+void VDocument::htmlToTextCB(int p_identitifer, int p_id, int p_timeStamp, const QString &p_text)
+{
+    emit htmlToTextFinished(p_identitifer, p_id, p_timeStamp, p_text);
+}
+
 void VDocument::noticeReadyToHighlightText()
 {
     m_readyToHighlight = true;

+ 15 - 0
src/vdocument.h

@@ -40,6 +40,12 @@ public:
                          const QString &p_text,
                          bool p_inlineStyle);
 
+    // Request to convert @p_html to Markdown text.
+    void htmlToTextAsync(int p_identitifer,
+                         int p_id,
+                         int p_timeStamp,
+                         const QString &p_html);
+
     void setFile(const VFile *p_file);
 
     bool isReadyToHighlight() const;
@@ -92,6 +98,8 @@ public slots:
 
     void textToHtmlCB(int p_identitifer, int p_id, int p_timeStamp, const QString &p_html);
 
+    void htmlToTextCB(int p_identitifer, int p_id, int p_timeStamp, const QString &p_text);
+
     void noticeReadyToTextToHtml();
 
     // Web-side handle logics (MathJax etc.) is finished.
@@ -144,8 +152,15 @@ signals:
                            const QString &p_text,
                            bool p_inlineStyle);
 
+    void requestHtmlToText(int p_identitifer,
+                           int p_id,
+                           int p_timeStamp,
+                           const QString &p_html);
+
     void textToHtmlFinished(int p_identitifer, int p_id, int p_timeStamp, const QString &p_html);
 
+    void htmlToTextFinished(int p_identitifer, int p_id, int p_timeStamp, const QString &p_text);
+
     void requestHtmlContent();
 
     void htmlContentFinished(const QString &p_headContent,

+ 41 - 0
src/vmdeditor.cpp

@@ -815,6 +815,36 @@ void VMdEditor::insertFromMimeData(const QMimeData *p_source)
             m_editOps->insertImageFromURL(QUrl(reg.cap(2)));
             return;
         }
+
+        // Handle HTML.
+        VSelectDialog dialog(tr("Insert From Clipboard"), this);
+        dialog.addSelection(tr("Insert Converted Markdown Text"), 0);
+        dialog.addSelection(tr("Insert As Text"), 1);
+        if (p_source->hasImage()) {
+            dialog.addSelection(tr("Insert As Image"), 2);
+        }
+
+        if (dialog.exec() == QDialog::Accepted) {
+            switch (dialog.getSelection()) {
+            case 0:
+                ++m_copyTimeStamp;
+                emit requestHtmlToText(html, 0, m_copyTimeStamp);
+                break;
+
+            case 1:
+                VTextEdit::insertFromMimeData(p_source);
+                break;
+
+            case 2:
+                m_editOps->insertImageFromMimeData(p_source);
+                break;
+
+            default:
+                break;
+            }
+        }
+
+        return;
     }
 
     VSelectDialog dialog(tr("Insert From Clipboard"), this);
@@ -1196,6 +1226,17 @@ void VMdEditor::textToHtmlFinished(int p_id,
     }
 }
 
+void VMdEditor::htmlToTextFinished(int p_id, int p_timeStamp, const QString &p_text)
+{
+    Q_UNUSED(p_id);
+    if (m_copyTimeStamp == p_timeStamp && !p_text.isEmpty()) {
+        QTextCursor cursor = textCursor();
+        cursor.insertText(p_text);
+        setTextCursor(cursor);
+        emit m_object->statusMessage(tr("Converted Markdown text inverted"));
+    }
+}
+
 void VMdEditor::wheelEvent(QWheelEvent *p_event)
 {
     if (handleWheelEvent(p_event)) {

+ 5 - 0
src/vmdeditor.h

@@ -84,6 +84,8 @@ public slots:
 
     void textToHtmlFinished(int p_id, int p_timeStamp, const QUrl &p_baseUrl, const QString &p_html);
 
+    void htmlToTextFinished(int p_id, int p_timeStamp, const QString &p_html);
+
 // Wrapper functions for QPlainTextEdit/QTextEdit.
 public:
     void setExtraSelectionsW(const QList<QTextEdit::ExtraSelection> &p_selections) Q_DECL_OVERRIDE
@@ -214,6 +216,9 @@ signals:
     // Request to convert @p_text to Html.
     void requestTextToHtml(const QString &p_text, int p_id, int p_timeStamp);
 
+    // Request to convert @p_html to Markdown text.
+    void requestHtmlToText(const QString &p_html, int p_id, int p_timeStamp);
+
 protected:
     void updateFontAndPalette() Q_DECL_OVERRIDE;
 

+ 28 - 0
src/vmdtab.cpp

@@ -451,6 +451,15 @@ void VMdTab::setupMarkdownViewer()
 
                 m_editor->textToHtmlFinished(p_id, p_timeStamp, m_webViewer->url(), p_html);
             });
+    connect(m_document, &VDocument::htmlToTextFinished,
+            this, [this](int p_identitifer, int p_id, int p_timeStamp, const QString &p_text) {
+                Q_ASSERT(m_editor);
+                if (m_documentID != p_identitifer) {
+                    return;
+                }
+
+                m_editor->htmlToTextFinished(p_id, p_timeStamp, p_text);
+            });
     connect(m_document, &VDocument::wordCountInfoUpdated,
             this, [this]() {
                 VEditTabInfo info = fetchTabInfo(VEditTabInfo::InfoType::All);
@@ -525,6 +534,8 @@ void VMdTab::setupMarkdownEditor()
             });
     connect(m_editor, &VMdEditor::requestTextToHtml,
             this, &VMdTab::textToHtmlViaWebView);
+    connect(m_editor, &VMdEditor::requestHtmlToText,
+            this, &VMdTab::htmlToTextViaWebView);
 
     if (m_editor->getVim()) {
         connect(m_editor->getVim(), &VVim::commandLineTriggered,
@@ -1217,6 +1228,23 @@ void VMdTab::textToHtmlViaWebView(const QString &p_text, int p_id, int p_timeSta
     m_document->textToHtmlAsync(m_documentID, p_id, p_timeStamp, p_text, true);
 }
 
+void VMdTab::htmlToTextViaWebView(const QString &p_html, int p_id, int p_timeStamp)
+{
+    int maxRetry = 50;
+    while (!m_document->isReadyToTextToHtml() && maxRetry > 0) {
+        qDebug() << "wait for web side ready to convert HTML to text";
+        VUtils::sleepWait(100);
+        --maxRetry;
+    }
+
+    if (maxRetry == 0) {
+        qWarning() << "web side is not ready to convert HTML to text";
+        return;
+    }
+
+    m_document->htmlToTextAsync(m_documentID, p_id, p_timeStamp, p_html);
+}
+
 void VMdTab::handleVimCmdCommandCancelled()
 {
     if (m_isEditMode) {

+ 2 - 0
src/vmdtab.h

@@ -221,6 +221,8 @@ private:
 
     void textToHtmlViaWebView(const QString &p_text, int p_id, int p_timeStamp);
 
+    void htmlToTextViaWebView(const QString &p_html, int p_id, int p_timeStamp);
+
     bool executeVimCommandInWebView(const QString &p_cmd);
 
     // Update web view by current content.

+ 3 - 0
src/vnote.cpp

@@ -48,6 +48,9 @@ const QString VNote::c_showdownJsFile = ":/resources/showdown.js";
 const QString VNote::c_showdownExtraFile = ":/utils/showdown/showdown.min.js";
 const QString VNote::c_showdownAnchorExtraFile = ":/utils/showdown/showdown-headinganchor.js";
 
+const QString VNote::c_turndownJsFile = ":/utils/turndown/turndown.js";
+const QString VNote::c_turndownGfmExtraFile = ":/utils/turndown/turndown-plugin-gfm.js";
+
 const QString VNote::c_mermaidApiJsFile = ":/utils/mermaid/mermaidAPI.min.js";
 const QString VNote::c_mermaidForestCssFile = ":/utils/mermaid/mermaid.forest.css";
 

+ 4 - 0
src/vnote.h

@@ -60,6 +60,10 @@ public:
     static const QString c_showdownExtraFile;
     static const QString c_showdownAnchorExtraFile;
 
+    // Turndown
+    static const QString c_turndownJsFile;
+    static const QString c_turndownGfmExtraFile;
+
     // Mermaid
     static const QString c_mermaidApiJsFile;
     static const QString c_mermaidForestCssFile;

+ 2 - 0
src/vnote.qrc

@@ -267,5 +267,7 @@
         <file>resources/icons/increase_outline_level.svg</file>
         <file>utils/markdown-it/markdown-it-texmath.js</file>
         <file>resources/icons/up.svg</file>
+        <file>utils/turndown/turndown.js</file>
+        <file>utils/turndown/turndown-plugin-gfm.js</file>
     </qresource>
 </RCC>