turndown.js 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909
  1. var TurndownService = (function () {
  2. 'use strict';
  3. function extend (destination) {
  4. for (var i = 1; i < arguments.length; i++) {
  5. var source = arguments[i];
  6. for (var key in source) {
  7. if (source.hasOwnProperty(key)) destination[key] = source[key];
  8. }
  9. }
  10. return destination
  11. }
  12. function repeat (character, count) {
  13. return Array(count + 1).join(character)
  14. }
  15. var blockElements = [
  16. 'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas',
  17. 'center', 'dd', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
  18. 'figure', 'footer', 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
  19. 'header', 'hgroup', 'hr', 'html', 'isindex', 'li', 'main', 'menu', 'nav',
  20. 'noframes', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table',
  21. 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul'
  22. ];
  23. function isBlock (node) {
  24. return blockElements.indexOf(node.nodeName.toLowerCase()) !== -1
  25. }
  26. var voidElements = [
  27. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input',
  28. 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
  29. ];
  30. function isVoid (node) {
  31. return voidElements.indexOf(node.nodeName.toLowerCase()) !== -1
  32. }
  33. var voidSelector = voidElements.join();
  34. function hasVoid (node) {
  35. return node.querySelector && node.querySelector(voidSelector)
  36. }
  37. var rules = {};
  38. rules.paragraph = {
  39. filter: 'p',
  40. replacement: function (content) {
  41. return '\n\n' + content + '\n\n'
  42. }
  43. };
  44. rules.lineBreak = {
  45. filter: 'br',
  46. replacement: function (content, node, options) {
  47. return options.br + '\n'
  48. }
  49. };
  50. rules.heading = {
  51. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  52. replacement: function (content, node, options) {
  53. var hLevel = Number(node.nodeName.charAt(1));
  54. if (options.headingStyle === 'setext' && hLevel < 3) {
  55. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  56. return (
  57. '\n\n' + content + '\n' + underline + '\n\n'
  58. )
  59. } else {
  60. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  61. }
  62. }
  63. };
  64. rules.blockquote = {
  65. filter: 'blockquote',
  66. replacement: function (content) {
  67. content = content.replace(/^\n+|\n+$/g, '');
  68. content = content.replace(/^/gm, '> ');
  69. return '\n\n' + content + '\n\n'
  70. }
  71. };
  72. rules.list = {
  73. filter: ['ul', 'ol'],
  74. replacement: function (content, node) {
  75. var parent = node.parentNode;
  76. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  77. return '\n' + content
  78. } else {
  79. return '\n\n' + content + '\n\n'
  80. }
  81. }
  82. };
  83. rules.listItem = {
  84. filter: 'li',
  85. replacement: function (content, node, options) {
  86. content = content
  87. .replace(/^\n+/, '') // remove leading newlines
  88. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  89. .replace(/\n/gm, '\n '); // indent
  90. var prefix = options.bulletListMarker + ' ';
  91. var parent = node.parentNode;
  92. if (parent.nodeName === 'OL') {
  93. var start = parent.getAttribute('start');
  94. var index = Array.prototype.indexOf.call(parent.children, node);
  95. prefix = (start ? Number(start) + index : index + 1) + '. ';
  96. }
  97. return (
  98. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  99. )
  100. }
  101. };
  102. rules.indentedCodeBlock = {
  103. filter: function (node, options) {
  104. return (
  105. options.codeBlockStyle === 'indented' &&
  106. node.nodeName === 'PRE' &&
  107. node.firstChild &&
  108. node.firstChild.nodeName === 'CODE'
  109. )
  110. },
  111. replacement: function (content, node, options) {
  112. return (
  113. '\n\n ' +
  114. node.firstChild.textContent.replace(/\n/g, '\n ') +
  115. '\n\n'
  116. )
  117. }
  118. };
  119. rules.fencedCodeBlock = {
  120. filter: function (node, options) {
  121. return (
  122. options.codeBlockStyle === 'fenced' &&
  123. node.nodeName === 'PRE' &&
  124. node.firstChild &&
  125. node.firstChild.nodeName === 'CODE'
  126. )
  127. },
  128. replacement: function (content, node, options) {
  129. var className = node.firstChild.className || '';
  130. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  131. return (
  132. '\n\n' + options.fence + language + '\n' +
  133. node.firstChild.textContent +
  134. '\n' + options.fence + '\n\n'
  135. )
  136. }
  137. };
  138. rules.horizontalRule = {
  139. filter: 'hr',
  140. replacement: function (content, node, options) {
  141. return '\n\n' + options.hr + '\n\n'
  142. }
  143. };
  144. rules.inlineLink = {
  145. filter: function (node, options) {
  146. return (
  147. options.linkStyle === 'inlined' &&
  148. node.nodeName === 'A' &&
  149. node.getAttribute('href')
  150. )
  151. },
  152. replacement: function (content, node) {
  153. var href = node.getAttribute('href');
  154. var title = node.title ? ' "' + node.title + '"' : '';
  155. return '[' + content + '](' + href + title + ')'
  156. }
  157. };
  158. rules.referenceLink = {
  159. filter: function (node, options) {
  160. return (
  161. options.linkStyle === 'referenced' &&
  162. node.nodeName === 'A' &&
  163. node.getAttribute('href')
  164. )
  165. },
  166. replacement: function (content, node, options) {
  167. var href = node.getAttribute('href');
  168. var title = node.title ? ' "' + node.title + '"' : '';
  169. var replacement;
  170. var reference;
  171. switch (options.linkReferenceStyle) {
  172. case 'collapsed':
  173. replacement = '[' + content + '][]';
  174. reference = '[' + content + ']: ' + href + title;
  175. break
  176. case 'shortcut':
  177. replacement = '[' + content + ']';
  178. reference = '[' + content + ']: ' + href + title;
  179. break
  180. default:
  181. var id = this.references.length + 1;
  182. replacement = '[' + content + '][' + id + ']';
  183. reference = '[' + id + ']: ' + href + title;
  184. }
  185. this.references.push(reference);
  186. return replacement
  187. },
  188. references: [],
  189. append: function (options) {
  190. var references = '';
  191. if (this.references.length) {
  192. references = '\n\n' + this.references.join('\n') + '\n\n';
  193. this.references = []; // Reset references
  194. }
  195. return references
  196. }
  197. };
  198. rules.emphasis = {
  199. filter: ['em', 'i'],
  200. replacement: function (content, node, options) {
  201. if (!content.trim()) return ''
  202. return options.emDelimiter + content + options.emDelimiter
  203. }
  204. };
  205. rules.strong = {
  206. filter: ['strong', 'b'],
  207. replacement: function (content, node, options) {
  208. if (!content.trim()) return ''
  209. return options.strongDelimiter + content + options.strongDelimiter
  210. }
  211. };
  212. rules.code = {
  213. filter: function (node) {
  214. var hasSiblings = node.previousSibling || node.nextSibling;
  215. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  216. return node.nodeName === 'CODE' && !isCodeBlock
  217. },
  218. replacement: function (content) {
  219. if (!content.trim()) return ''
  220. var delimiter = '`';
  221. var leadingSpace = '';
  222. var trailingSpace = '';
  223. var matches = content.match(/`+/gm);
  224. if (matches) {
  225. if (/^`/.test(content)) leadingSpace = ' ';
  226. if (/`$/.test(content)) trailingSpace = ' ';
  227. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  228. }
  229. return delimiter + leadingSpace + content + trailingSpace + delimiter
  230. }
  231. };
  232. rules.image = {
  233. filter: 'img',
  234. replacement: function (content, node) {
  235. var alt = node.alt || '';
  236. var src = node.getAttribute('src') || '';
  237. var title = node.title || '';
  238. var titlePart = title ? ' "' + title + '"' : '';
  239. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  240. }
  241. };
  242. /**
  243. * Manages a collection of rules used to convert HTML to Markdown
  244. */
  245. function Rules (options) {
  246. this.options = options;
  247. this._keep = [];
  248. this._remove = [];
  249. this.blankRule = {
  250. replacement: options.blankReplacement
  251. };
  252. this.keepReplacement = options.keepReplacement;
  253. this.defaultRule = {
  254. replacement: options.defaultReplacement
  255. };
  256. this.array = [];
  257. for (var key in options.rules) this.array.push(options.rules[key]);
  258. }
  259. Rules.prototype = {
  260. add: function (key, rule) {
  261. this.array.unshift(rule);
  262. },
  263. keep: function (filter) {
  264. this._keep.unshift({
  265. filter: filter,
  266. replacement: this.keepReplacement
  267. });
  268. },
  269. remove: function (filter) {
  270. this._remove.unshift({
  271. filter: filter,
  272. replacement: function () {
  273. return ''
  274. }
  275. });
  276. },
  277. forNode: function (node) {
  278. if (node.isBlank) return this.blankRule
  279. var rule;
  280. if ((rule = findRule(this.array, node, this.options))) return rule
  281. if ((rule = findRule(this._keep, node, this.options))) return rule
  282. if ((rule = findRule(this._remove, node, this.options))) return rule
  283. return this.defaultRule
  284. },
  285. forEach: function (fn) {
  286. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  287. }
  288. };
  289. function findRule (rules, node, options) {
  290. for (var i = 0; i < rules.length; i++) {
  291. var rule = rules[i];
  292. if (filterValue(rule, node, options)) return rule
  293. }
  294. return void 0
  295. }
  296. function filterValue (rule, node, options) {
  297. var filter = rule.filter;
  298. if (typeof filter === 'string') {
  299. if (filter === node.nodeName.toLowerCase()) return true
  300. } else if (Array.isArray(filter)) {
  301. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  302. } else if (typeof filter === 'function') {
  303. if (filter.call(rule, node, options)) return true
  304. } else {
  305. throw new TypeError('`filter` needs to be a string, array, or function')
  306. }
  307. }
  308. /**
  309. * The collapseWhitespace function is adapted from collapse-whitespace
  310. * by Luc Thevenard.
  311. *
  312. * The MIT License (MIT)
  313. *
  314. * Copyright (c) 2014 Luc Thevenard <[email protected]>
  315. *
  316. * Permission is hereby granted, free of charge, to any person obtaining a copy
  317. * of this software and associated documentation files (the "Software"), to deal
  318. * in the Software without restriction, including without limitation the rights
  319. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  320. * copies of the Software, and to permit persons to whom the Software is
  321. * furnished to do so, subject to the following conditions:
  322. *
  323. * The above copyright notice and this permission notice shall be included in
  324. * all copies or substantial portions of the Software.
  325. *
  326. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  327. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  328. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  329. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  330. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  331. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  332. * THE SOFTWARE.
  333. */
  334. /**
  335. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  336. *
  337. * @param {Object} options
  338. */
  339. function collapseWhitespace (options) {
  340. var element = options.element;
  341. var isBlock = options.isBlock;
  342. var isVoid = options.isVoid;
  343. var isPre = options.isPre || function (node) {
  344. return node.nodeName === 'PRE'
  345. };
  346. if (!element.firstChild || isPre(element)) return
  347. var prevText = null;
  348. var prevVoid = false;
  349. var prev = null;
  350. var node = next(prev, element, isPre);
  351. while (node !== element) {
  352. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  353. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  354. if ((!prevText || / $/.test(prevText.data)) &&
  355. !prevVoid && text[0] === ' ') {
  356. text = text.substr(1);
  357. }
  358. // `text` might be empty at this point.
  359. if (!text) {
  360. node = remove(node);
  361. continue
  362. }
  363. node.data = text;
  364. prevText = node;
  365. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  366. if (isBlock(node) || node.nodeName === 'BR') {
  367. if (prevText) {
  368. prevText.data = prevText.data.replace(/ $/, '');
  369. }
  370. prevText = null;
  371. prevVoid = false;
  372. } else if (isVoid(node)) {
  373. // Avoid trimming space around non-block, non-BR void elements.
  374. prevText = null;
  375. prevVoid = true;
  376. }
  377. } else {
  378. node = remove(node);
  379. continue
  380. }
  381. var nextNode = next(prev, node, isPre);
  382. prev = node;
  383. node = nextNode;
  384. }
  385. if (prevText) {
  386. prevText.data = prevText.data.replace(/ $/, '');
  387. if (!prevText.data) {
  388. remove(prevText);
  389. }
  390. }
  391. }
  392. /**
  393. * remove(node) removes the given node from the DOM and returns the
  394. * next node in the sequence.
  395. *
  396. * @param {Node} node
  397. * @return {Node} node
  398. */
  399. function remove (node) {
  400. var next = node.nextSibling || node.parentNode;
  401. node.parentNode.removeChild(node);
  402. return next
  403. }
  404. /**
  405. * next(prev, current, isPre) returns the next node in the sequence, given the
  406. * current and previous nodes.
  407. *
  408. * @param {Node} prev
  409. * @param {Node} current
  410. * @param {Function} isPre
  411. * @return {Node}
  412. */
  413. function next (prev, current, isPre) {
  414. if ((prev && prev.parentNode === current) || isPre(current)) {
  415. return current.nextSibling || current.parentNode
  416. }
  417. return current.firstChild || current.nextSibling || current.parentNode
  418. }
  419. /*
  420. * Set up window for Node.js
  421. */
  422. var root = (typeof window !== 'undefined' ? window : {});
  423. /*
  424. * Parsing HTML strings
  425. */
  426. function canParseHTMLNatively () {
  427. var Parser = root.DOMParser;
  428. var canParse = false;
  429. // Adapted from https://gist.github.com/1129031
  430. // Firefox/Opera/IE throw errors on unsupported types
  431. try {
  432. // WebKit returns null on unsupported types
  433. if (new Parser().parseFromString('', 'text/html')) {
  434. canParse = true;
  435. }
  436. } catch (e) {}
  437. return canParse
  438. }
  439. function createHTMLParser () {
  440. var Parser = function () {};
  441. {
  442. if (shouldUseActiveX()) {
  443. Parser.prototype.parseFromString = function (string) {
  444. var doc = new window.ActiveXObject('htmlfile');
  445. doc.designMode = 'on'; // disable on-page scripts
  446. doc.open();
  447. doc.write(string);
  448. doc.close();
  449. return doc
  450. };
  451. } else {
  452. Parser.prototype.parseFromString = function (string) {
  453. var doc = document.implementation.createHTMLDocument('');
  454. doc.open();
  455. doc.write(string);
  456. doc.close();
  457. return doc
  458. };
  459. }
  460. }
  461. return Parser
  462. }
  463. function shouldUseActiveX () {
  464. var useActiveX = false;
  465. try {
  466. document.implementation.createHTMLDocument('').open();
  467. } catch (e) {
  468. if (window.ActiveXObject) useActiveX = true;
  469. }
  470. return useActiveX
  471. }
  472. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  473. function RootNode (input) {
  474. var root;
  475. if (typeof input === 'string') {
  476. var doc = htmlParser().parseFromString(
  477. // DOM parsers arrange elements in the <head> and <body>.
  478. // Wrapping in a custom element ensures elements are reliably arranged in
  479. // a single element.
  480. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  481. 'text/html'
  482. );
  483. root = doc.getElementById('turndown-root');
  484. } else {
  485. root = input.cloneNode(true);
  486. }
  487. collapseWhitespace({
  488. element: root,
  489. isBlock: isBlock,
  490. isVoid: isVoid
  491. });
  492. return root
  493. }
  494. var _htmlParser;
  495. function htmlParser () {
  496. _htmlParser = _htmlParser || new HTMLParser();
  497. return _htmlParser
  498. }
  499. function Node (node) {
  500. node.isBlock = isBlock(node);
  501. node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
  502. node.isBlank = isBlank(node);
  503. node.flankingWhitespace = flankingWhitespace(node);
  504. return node
  505. }
  506. function isBlank (node) {
  507. //如果要忽略掉空白的表,请删除'TABLE','THEAD','TBODY','TR'
  508. return (
  509. ['A', 'TABLE','THEAD','TBODY','TR','TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO'].indexOf(node.nodeName) === -1 &&
  510. /^\s*$/i.test(node.textContent) &&
  511. !isVoid(node) &&
  512. !hasVoid(node)
  513. )
  514. }
  515. function flankingWhitespace (node) {
  516. var leading = '';
  517. var trailing = '';
  518. if (!node.isBlock) {
  519. var hasLeading = /^[ \r\n\t]/.test(node.textContent);
  520. var hasTrailing = /[ \r\n\t]$/.test(node.textContent);
  521. if (hasLeading && !isFlankedByWhitespace('left', node)) {
  522. leading = ' ';
  523. }
  524. if (hasTrailing && !isFlankedByWhitespace('right', node)) {
  525. trailing = ' ';
  526. }
  527. }
  528. return { leading: leading, trailing: trailing }
  529. }
  530. function isFlankedByWhitespace (side, node) {
  531. var sibling;
  532. var regExp;
  533. var isFlanked;
  534. if (side === 'left') {
  535. sibling = node.previousSibling;
  536. regExp = / $/;
  537. } else {
  538. sibling = node.nextSibling;
  539. regExp = /^ /;
  540. }
  541. if (sibling) {
  542. if (sibling.nodeType === 3) {
  543. isFlanked = regExp.test(sibling.nodeValue);
  544. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  545. isFlanked = regExp.test(sibling.textContent);
  546. }
  547. }
  548. return isFlanked
  549. }
  550. var reduce = Array.prototype.reduce;
  551. var leadingNewLinesRegExp = /^\n*/;
  552. var trailingNewLinesRegExp = /\n*$/;
  553. var escapes = [
  554. [/\\/g, '\\\\'],
  555. [/\*/g, '\\*'],
  556. [/^-/g, '\\-'],
  557. [/^\+ /g, '\\+ '],
  558. [/^(=+)/g, '\\$1'],
  559. [/^(#{1,6}) /g, '\\$1 '],
  560. [/`/g, '\\`'],
  561. [/^~~~/g, '\\~~~'],
  562. [/\[/g, '\\['],
  563. [/\]/g, '\\]'],
  564. [/^>/g, '\\>'],
  565. [/_/g, '\\_'],
  566. [/^(\d+)\. /g, '$1\\. ']
  567. ];
  568. function TurndownService (options) {
  569. if (!(this instanceof TurndownService)) return new TurndownService(options)
  570. var defaults = {
  571. rules: rules,
  572. headingStyle: 'setext',
  573. hr: '* * *',
  574. bulletListMarker: '*',
  575. codeBlockStyle: 'indented',
  576. fence: '```',
  577. emDelimiter: '_',
  578. strongDelimiter: '**',
  579. linkStyle: 'inlined',
  580. linkReferenceStyle: 'full',
  581. br: ' ',
  582. blankReplacement: function (content, node) {
  583. return node.isBlock ? '\n\n' : ''
  584. },
  585. keepReplacement: function (content, node) {
  586. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  587. },
  588. defaultReplacement: function (content, node) {
  589. return node.isBlock ? '\n\n' + content + '\n\n' : content
  590. }
  591. };
  592. this.options = extend({}, defaults, options);
  593. this.rules = new Rules(this.options);
  594. }
  595. TurndownService.prototype = {
  596. /**
  597. * The entry point for converting a string or DOM node to Markdown
  598. * @public
  599. * @param {String|HTMLElement} input The string or DOM node to convert
  600. * @returns A Markdown representation of the input
  601. * @type String
  602. */
  603. turndown: function (input) {
  604. if (!canConvert(input)) {
  605. throw new TypeError(
  606. input + ' is not a string, or an element/document/fragment node.'
  607. )
  608. }
  609. if (input === '') return ''
  610. var output = process.call(this, new RootNode(input));
  611. return postProcess.call(this, output)
  612. },
  613. /**
  614. * Add one or more plugins
  615. * @public
  616. * @param {Function|Array} plugin The plugin or array of plugins to add
  617. * @returns The Turndown instance for chaining
  618. * @type Object
  619. */
  620. use: function (plugin) {
  621. if (Array.isArray(plugin)) {
  622. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  623. } else if (typeof plugin === 'function') {
  624. plugin(this);
  625. } else {
  626. throw new TypeError('plugin must be a Function or an Array of Functions')
  627. }
  628. return this
  629. },
  630. /**
  631. * Adds a rule
  632. * @public
  633. * @param {String} key The unique key of the rule
  634. * @param {Object} rule The rule
  635. * @returns The Turndown instance for chaining
  636. * @type Object
  637. */
  638. addRule: function (key, rule) {
  639. this.rules.add(key, rule);
  640. return this
  641. },
  642. /**
  643. * Keep a node (as HTML) that matches the filter
  644. * @public
  645. * @param {String|Array|Function} filter The unique key of the rule
  646. * @returns The Turndown instance for chaining
  647. * @type Object
  648. */
  649. keep: function (filter) {
  650. this.rules.keep(filter);
  651. return this
  652. },
  653. /**
  654. * Remove a node that matches the filter
  655. * @public
  656. * @param {String|Array|Function} filter The unique key of the rule
  657. * @returns The Turndown instance for chaining
  658. * @type Object
  659. */
  660. remove: function (filter) {
  661. this.rules.remove(filter);
  662. return this
  663. },
  664. /**
  665. * Escapes Markdown syntax
  666. * @public
  667. * @param {String} string The string to escape
  668. * @returns A string with Markdown syntax escaped
  669. * @type String
  670. */
  671. escape: function (string) {
  672. return escapes.reduce(function (accumulator, escape) {
  673. return accumulator.replace(escape[0], escape[1])
  674. }, string)
  675. }
  676. };
  677. /**
  678. * Reduces a DOM node down to its Markdown string equivalent
  679. * @private
  680. * @param {HTMLElement} parentNode The node to convert
  681. * @returns A Markdown representation of the node
  682. * @type String
  683. */
  684. function process (parentNode) {
  685. var self = this;
  686. return reduce.call(parentNode.childNodes, function (output, node) {
  687. node = new Node(node);
  688. var replacement = '';
  689. if (node.nodeType === 3) {
  690. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  691. } else if (node.nodeType === 1) {
  692. replacement = replacementForNode.call(self, node);
  693. }
  694. return join(output, replacement)
  695. }, '')
  696. }
  697. /**
  698. * Appends strings as each rule requires and trims the output
  699. * @private
  700. * @param {String} output The conversion output
  701. * @returns A trimmed version of the ouput
  702. * @type String
  703. */
  704. function postProcess (output) {
  705. var self = this;
  706. this.rules.forEach(function (rule) {
  707. if (typeof rule.append === 'function') {
  708. output = join(output, rule.append(self.options));
  709. }
  710. });
  711. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  712. }
  713. /**
  714. * Converts an element node to its Markdown equivalent
  715. * @private
  716. * @param {HTMLElement} node The node to convert
  717. * @returns A Markdown representation of the node
  718. * @type String
  719. */
  720. function replacementForNode (node) {
  721. var rule = this.rules.forNode(node);
  722. var content = process.call(this, node);
  723. var whitespace = node.flankingWhitespace;
  724. if (whitespace.leading || whitespace.trailing) content = content.trim();
  725. return (
  726. whitespace.leading +
  727. rule.replacement(content, node, this.options) +
  728. whitespace.trailing
  729. )
  730. }
  731. /**
  732. * Determines the new lines between the current output and the replacement
  733. * @private
  734. * @param {String} output The current conversion output
  735. * @param {String} replacement The string to append to the output
  736. * @returns The whitespace to separate the current output and the replacement
  737. * @type String
  738. */
  739. function separatingNewlines (output, replacement) {
  740. var newlines = [
  741. output.match(trailingNewLinesRegExp)[0],
  742. replacement.match(leadingNewLinesRegExp)[0]
  743. ].sort();
  744. var maxNewlines = newlines[newlines.length - 1];
  745. return maxNewlines.length < 2 ? maxNewlines : '\n\n'
  746. }
  747. function join (string1, string2) {
  748. var separator = separatingNewlines(string1, string2);
  749. // Remove trailing/leading newlines and replace with separator
  750. string1 = string1.replace(trailingNewLinesRegExp, '');
  751. string2 = string2.replace(leadingNewLinesRegExp, '');
  752. return string1 + separator + string2
  753. }
  754. /**
  755. * Determines whether an input can be converted
  756. * @private
  757. * @param {String|HTMLElement} input Describe this parameter
  758. * @returns Describe what it returns
  759. * @type String|Object|Array|Boolean|Number
  760. */
  761. function canConvert (input) {
  762. return (
  763. input != null && (
  764. typeof input === 'string' ||
  765. (input.nodeType && (
  766. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  767. ))
  768. )
  769. )
  770. }
  771. return TurndownService;
  772. }());