turndown.js 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974
  1. var TurndownService = (function () {
  2. 'use strict';
  3. function extend (destination) {
  4. for (var i = 1; i < arguments.length; i++) {
  5. var source = arguments[i];
  6. for (var key in source) {
  7. if (source.hasOwnProperty(key)) destination[key] = source[key];
  8. }
  9. }
  10. return destination
  11. }
  12. function repeat (character, count) {
  13. return Array(count + 1).join(character)
  14. }
  15. function trimLeadingNewlines (string) {
  16. return string.replace(/^\n*/, '')
  17. }
  18. function trimTrailingNewlines (string) {
  19. // avoid match-at-end regexp bottleneck, see #370
  20. var indexEnd = string.length;
  21. while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
  22. return string.substring(0, indexEnd)
  23. }
  24. var blockElements = [
  25. 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
  26. 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
  27. 'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
  28. 'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
  29. 'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
  30. 'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
  31. ];
  32. function isBlock (node) {
  33. return is(node, blockElements)
  34. }
  35. var voidElements = [
  36. 'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
  37. 'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
  38. ];
  39. function isVoid (node) {
  40. return is(node, voidElements)
  41. }
  42. function hasVoid (node) {
  43. return has(node, voidElements)
  44. }
  45. var meaningfulWhenBlankElements = [
  46. 'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
  47. 'AUDIO', 'VIDEO'
  48. ];
  49. function isMeaningfulWhenBlank (node) {
  50. return is(node, meaningfulWhenBlankElements)
  51. }
  52. function hasMeaningfulWhenBlank (node) {
  53. return has(node, meaningfulWhenBlankElements)
  54. }
  55. function is (node, tagNames) {
  56. return tagNames.indexOf(node.nodeName) >= 0
  57. }
  58. function has (node, tagNames) {
  59. return (
  60. node.getElementsByTagName &&
  61. tagNames.some(function (tagName) {
  62. return node.getElementsByTagName(tagName).length
  63. })
  64. )
  65. }
  66. var rules = {};
  67. rules.paragraph = {
  68. filter: 'p',
  69. replacement: function (content) {
  70. return '\n\n' + content + '\n\n'
  71. }
  72. };
  73. rules.lineBreak = {
  74. filter: 'br',
  75. replacement: function (content, node, options) {
  76. return options.br + '\n'
  77. }
  78. };
  79. rules.heading = {
  80. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  81. replacement: function (content, node, options) {
  82. var hLevel = Number(node.nodeName.charAt(1));
  83. if (options.headingStyle === 'setext' && hLevel < 3) {
  84. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  85. return (
  86. '\n\n' + content + '\n' + underline + '\n\n'
  87. )
  88. } else {
  89. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  90. }
  91. }
  92. };
  93. rules.blockquote = {
  94. filter: 'blockquote',
  95. replacement: function (content) {
  96. content = content.replace(/^\n+|\n+$/g, '');
  97. content = content.replace(/^/gm, '> ');
  98. return '\n\n' + content + '\n\n'
  99. }
  100. };
  101. rules.list = {
  102. filter: ['ul', 'ol'],
  103. replacement: function (content, node) {
  104. var parent = node.parentNode;
  105. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  106. return '\n' + content
  107. } else {
  108. return '\n\n' + content + '\n\n'
  109. }
  110. }
  111. };
  112. rules.listItem = {
  113. filter: 'li',
  114. replacement: function (content, node, options) {
  115. content = content
  116. .replace(/^\n+/, '') // remove leading newlines
  117. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  118. .replace(/\n/gm, '\n '); // indent
  119. var prefix = options.bulletListMarker + ' ';
  120. var parent = node.parentNode;
  121. if (parent.nodeName === 'OL') {
  122. var start = parent.getAttribute('start');
  123. var index = Array.prototype.indexOf.call(parent.children, node);
  124. prefix = (start ? Number(start) + index : index + 1) + '. ';
  125. }
  126. return (
  127. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  128. )
  129. }
  130. };
  131. rules.indentedCodeBlock = {
  132. filter: function (node, options) {
  133. return (
  134. options.codeBlockStyle === 'indented' &&
  135. node.nodeName === 'PRE' &&
  136. node.firstChild &&
  137. node.firstChild.nodeName === 'CODE'
  138. )
  139. },
  140. replacement: function (content, node, options) {
  141. return (
  142. '\n\n ' +
  143. node.firstChild.textContent.replace(/\n/g, '\n ') +
  144. '\n\n'
  145. )
  146. }
  147. };
  148. rules.fencedCodeBlock = {
  149. filter: function (node, options) {
  150. return (
  151. options.codeBlockStyle === 'fenced' &&
  152. node.nodeName === 'PRE' &&
  153. node.firstChild &&
  154. node.firstChild.nodeName === 'CODE'
  155. )
  156. },
  157. replacement: function (content, node, options) {
  158. var className = node.firstChild.getAttribute('class') || '';
  159. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  160. var code = node.firstChild.textContent;
  161. var fenceChar = options.fence.charAt(0);
  162. var fenceSize = 3;
  163. var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
  164. var match;
  165. while ((match = fenceInCodeRegex.exec(code))) {
  166. if (match[0].length >= fenceSize) {
  167. fenceSize = match[0].length + 1;
  168. }
  169. }
  170. var fence = repeat(fenceChar, fenceSize);
  171. return (
  172. '\n\n' + fence + language + '\n' +
  173. code.replace(/\n$/, '') +
  174. '\n' + fence + '\n\n'
  175. )
  176. }
  177. };
  178. rules.horizontalRule = {
  179. filter: 'hr',
  180. replacement: function (content, node, options) {
  181. return '\n\n' + options.hr + '\n\n'
  182. }
  183. };
  184. rules.inlineLink = {
  185. filter: function (node, options) {
  186. return (
  187. options.linkStyle === 'inlined' &&
  188. node.nodeName === 'A' &&
  189. node.getAttribute('href')
  190. )
  191. },
  192. replacement: function (content, node) {
  193. var href = node.getAttribute('href');
  194. if (href) href = href.replace(/([()])/g, '\\$1');
  195. var title = cleanAttribute(node.getAttribute('title'));
  196. if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
  197. return '[' + content + '](' + href + title + ')'
  198. }
  199. };
  200. rules.referenceLink = {
  201. filter: function (node, options) {
  202. return (
  203. options.linkStyle === 'referenced' &&
  204. node.nodeName === 'A' &&
  205. node.getAttribute('href')
  206. )
  207. },
  208. replacement: function (content, node, options) {
  209. var href = node.getAttribute('href');
  210. var title = cleanAttribute(node.getAttribute('title'));
  211. if (title) title = ' "' + title + '"';
  212. var replacement;
  213. var reference;
  214. switch (options.linkReferenceStyle) {
  215. case 'collapsed':
  216. replacement = '[' + content + '][]';
  217. reference = '[' + content + ']: ' + href + title;
  218. break
  219. case 'shortcut':
  220. replacement = '[' + content + ']';
  221. reference = '[' + content + ']: ' + href + title;
  222. break
  223. default:
  224. var id = this.references.length + 1;
  225. replacement = '[' + content + '][' + id + ']';
  226. reference = '[' + id + ']: ' + href + title;
  227. }
  228. this.references.push(reference);
  229. return replacement
  230. },
  231. references: [],
  232. append: function (options) {
  233. var references = '';
  234. if (this.references.length) {
  235. references = '\n\n' + this.references.join('\n') + '\n\n';
  236. this.references = []; // Reset references
  237. }
  238. return references
  239. }
  240. };
  241. rules.emphasis = {
  242. filter: ['em', 'i'],
  243. replacement: function (content, node, options) {
  244. if (!content.trim()) return ''
  245. return options.emDelimiter + content + options.emDelimiter
  246. }
  247. };
  248. rules.strong = {
  249. filter: ['strong', 'b'],
  250. replacement: function (content, node, options) {
  251. if (!content.trim()) return ''
  252. return options.strongDelimiter + content + options.strongDelimiter
  253. }
  254. };
  255. rules.code = {
  256. filter: function (node) {
  257. var hasSiblings = node.previousSibling || node.nextSibling;
  258. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  259. return node.nodeName === 'CODE' && !isCodeBlock
  260. },
  261. replacement: function (content) {
  262. if (!content) return ''
  263. content = content.replace(/\r?\n|\r/g, ' ');
  264. var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
  265. var delimiter = '`';
  266. var matches = content.match(/`+/gm) || [];
  267. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  268. return delimiter + extraSpace + content + extraSpace + delimiter
  269. }
  270. };
  271. rules.image = {
  272. filter: 'img',
  273. replacement: function (content, node) {
  274. var alt = cleanAttribute(node.getAttribute('alt'));
  275. var src = node.getAttribute('src') || '';
  276. var title = cleanAttribute(node.getAttribute('title'));
  277. var titlePart = title ? ' "' + title + '"' : '';
  278. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  279. }
  280. };
  281. function cleanAttribute (attribute) {
  282. return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
  283. }
  284. /**
  285. * Manages a collection of rules used to convert HTML to Markdown
  286. */
  287. function Rules (options) {
  288. this.options = options;
  289. this._keep = [];
  290. this._remove = [];
  291. this.blankRule = {
  292. replacement: options.blankReplacement
  293. };
  294. this.keepReplacement = options.keepReplacement;
  295. this.defaultRule = {
  296. replacement: options.defaultReplacement
  297. };
  298. this.array = [];
  299. for (var key in options.rules) this.array.push(options.rules[key]);
  300. }
  301. Rules.prototype = {
  302. add: function (key, rule) {
  303. this.array.unshift(rule);
  304. },
  305. keep: function (filter) {
  306. this._keep.unshift({
  307. filter: filter,
  308. replacement: this.keepReplacement
  309. });
  310. },
  311. remove: function (filter) {
  312. this._remove.unshift({
  313. filter: filter,
  314. replacement: function () {
  315. return ''
  316. }
  317. });
  318. },
  319. forNode: function (node) {
  320. if (node.isBlank) return this.blankRule
  321. var rule;
  322. if ((rule = findRule(this.array, node, this.options))) return rule
  323. if ((rule = findRule(this._keep, node, this.options))) return rule
  324. if ((rule = findRule(this._remove, node, this.options))) return rule
  325. return this.defaultRule
  326. },
  327. forEach: function (fn) {
  328. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  329. }
  330. };
  331. function findRule (rules, node, options) {
  332. for (var i = 0; i < rules.length; i++) {
  333. var rule = rules[i];
  334. if (filterValue(rule, node, options)) return rule
  335. }
  336. return void 0
  337. }
  338. function filterValue (rule, node, options) {
  339. var filter = rule.filter;
  340. if (typeof filter === 'string') {
  341. if (filter === node.nodeName.toLowerCase()) return true
  342. } else if (Array.isArray(filter)) {
  343. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  344. } else if (typeof filter === 'function') {
  345. if (filter.call(rule, node, options)) return true
  346. } else {
  347. throw new TypeError('`filter` needs to be a string, array, or function')
  348. }
  349. }
  350. /**
  351. * The collapseWhitespace function is adapted from collapse-whitespace
  352. * by Luc Thevenard.
  353. *
  354. * The MIT License (MIT)
  355. *
  356. * Copyright (c) 2014 Luc Thevenard <[email protected]>
  357. *
  358. * Permission is hereby granted, free of charge, to any person obtaining a copy
  359. * of this software and associated documentation files (the "Software"), to deal
  360. * in the Software without restriction, including without limitation the rights
  361. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  362. * copies of the Software, and to permit persons to whom the Software is
  363. * furnished to do so, subject to the following conditions:
  364. *
  365. * The above copyright notice and this permission notice shall be included in
  366. * all copies or substantial portions of the Software.
  367. *
  368. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  369. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  370. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  371. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  372. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  373. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  374. * THE SOFTWARE.
  375. */
  376. /**
  377. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  378. *
  379. * @param {Object} options
  380. */
  381. function collapseWhitespace (options) {
  382. var element = options.element;
  383. var isBlock = options.isBlock;
  384. var isVoid = options.isVoid;
  385. var isPre = options.isPre || function (node) {
  386. return node.nodeName === 'PRE'
  387. };
  388. if (!element.firstChild || isPre(element)) return
  389. var prevText = null;
  390. var keepLeadingWs = false;
  391. var prev = null;
  392. var node = next(prev, element, isPre);
  393. while (node !== element) {
  394. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  395. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  396. if ((!prevText || / $/.test(prevText.data)) &&
  397. !keepLeadingWs && text[0] === ' ') {
  398. text = text.substr(1);
  399. }
  400. // `text` might be empty at this point.
  401. if (!text) {
  402. node = remove(node);
  403. continue
  404. }
  405. node.data = text;
  406. prevText = node;
  407. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  408. if (isBlock(node) || node.nodeName === 'BR') {
  409. if (prevText) {
  410. prevText.data = prevText.data.replace(/ $/, '');
  411. }
  412. prevText = null;
  413. keepLeadingWs = false;
  414. } else if (isVoid(node) || isPre(node)) {
  415. // Avoid trimming space around non-block, non-BR void elements and inline PRE.
  416. prevText = null;
  417. keepLeadingWs = true;
  418. } else if (prevText) {
  419. // Drop protection if set previously.
  420. keepLeadingWs = false;
  421. }
  422. } else {
  423. node = remove(node);
  424. continue
  425. }
  426. var nextNode = next(prev, node, isPre);
  427. prev = node;
  428. node = nextNode;
  429. }
  430. if (prevText) {
  431. prevText.data = prevText.data.replace(/ $/, '');
  432. if (!prevText.data) {
  433. remove(prevText);
  434. }
  435. }
  436. }
  437. /**
  438. * remove(node) removes the given node from the DOM and returns the
  439. * next node in the sequence.
  440. *
  441. * @param {Node} node
  442. * @return {Node} node
  443. */
  444. function remove (node) {
  445. var next = node.nextSibling || node.parentNode;
  446. node.parentNode.removeChild(node);
  447. return next
  448. }
  449. /**
  450. * next(prev, current, isPre) returns the next node in the sequence, given the
  451. * current and previous nodes.
  452. *
  453. * @param {Node} prev
  454. * @param {Node} current
  455. * @param {Function} isPre
  456. * @return {Node}
  457. */
  458. function next (prev, current, isPre) {
  459. if ((prev && prev.parentNode === current) || isPre(current)) {
  460. return current.nextSibling || current.parentNode
  461. }
  462. return current.firstChild || current.nextSibling || current.parentNode
  463. }
  464. /*
  465. * Set up window for Node.js
  466. */
  467. var root = (typeof window !== 'undefined' ? window : {});
  468. /*
  469. * Parsing HTML strings
  470. */
  471. function canParseHTMLNatively () {
  472. var Parser = root.DOMParser;
  473. var canParse = false;
  474. // Adapted from https://gist.github.com/1129031
  475. // Firefox/Opera/IE throw errors on unsupported types
  476. try {
  477. // WebKit returns null on unsupported types
  478. if (new Parser().parseFromString('', 'text/html')) {
  479. canParse = true;
  480. }
  481. } catch (e) {}
  482. return canParse
  483. }
  484. function createHTMLParser () {
  485. var Parser = function () {};
  486. {
  487. if (shouldUseActiveX()) {
  488. Parser.prototype.parseFromString = function (string) {
  489. var doc = new window.ActiveXObject('htmlfile');
  490. doc.designMode = 'on'; // disable on-page scripts
  491. doc.open();
  492. doc.write(string);
  493. doc.close();
  494. return doc
  495. };
  496. } else {
  497. Parser.prototype.parseFromString = function (string) {
  498. var doc = document.implementation.createHTMLDocument('');
  499. doc.open();
  500. doc.write(string);
  501. doc.close();
  502. return doc
  503. };
  504. }
  505. }
  506. return Parser
  507. }
  508. function shouldUseActiveX () {
  509. var useActiveX = false;
  510. try {
  511. document.implementation.createHTMLDocument('').open();
  512. } catch (e) {
  513. if (root.ActiveXObject) useActiveX = true;
  514. }
  515. return useActiveX
  516. }
  517. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  518. function RootNode (input, options) {
  519. var root;
  520. if (typeof input === 'string') {
  521. var doc = htmlParser().parseFromString(
  522. // DOM parsers arrange elements in the <head> and <body>.
  523. // Wrapping in a custom element ensures elements are reliably arranged in
  524. // a single element.
  525. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  526. 'text/html'
  527. );
  528. root = doc.getElementById('turndown-root');
  529. } else {
  530. root = input.cloneNode(true);
  531. }
  532. collapseWhitespace({
  533. element: root,
  534. isBlock: isBlock,
  535. isVoid: isVoid,
  536. isPre: options.preformattedCode ? isPreOrCode : null
  537. });
  538. return root
  539. }
  540. var _htmlParser;
  541. function htmlParser () {
  542. _htmlParser = _htmlParser || new HTMLParser();
  543. return _htmlParser
  544. }
  545. function isPreOrCode (node) {
  546. return node.nodeName === 'PRE' || node.nodeName === 'CODE'
  547. }
  548. function Node (node, options) {
  549. node.isBlock = isBlock(node);
  550. node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
  551. node.isBlank = isBlank(node);
  552. node.flankingWhitespace = flankingWhitespace(node, options);
  553. return node
  554. }
  555. function isBlank (node) {
  556. return (
  557. !isVoid(node) &&
  558. !isMeaningfulWhenBlank(node) &&
  559. /^\s*$/i.test(node.textContent) &&
  560. !hasVoid(node) &&
  561. !hasMeaningfulWhenBlank(node)
  562. )
  563. }
  564. function flankingWhitespace (node, options) {
  565. if (node.isBlock || (options.preformattedCode && node.isCode)) {
  566. return { leading: '', trailing: '' }
  567. }
  568. var edges = edgeWhitespace(node.textContent);
  569. // abandon leading ASCII WS if left-flanked by ASCII WS
  570. if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
  571. edges.leading = edges.leadingNonAscii;
  572. }
  573. // abandon trailing ASCII WS if right-flanked by ASCII WS
  574. if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
  575. edges.trailing = edges.trailingNonAscii;
  576. }
  577. return { leading: edges.leading, trailing: edges.trailing }
  578. }
  579. function edgeWhitespace (string) {
  580. var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
  581. return {
  582. leading: m[1], // whole string for whitespace-only strings
  583. leadingAscii: m[2],
  584. leadingNonAscii: m[3],
  585. trailing: m[4], // empty for whitespace-only strings
  586. trailingNonAscii: m[5],
  587. trailingAscii: m[6]
  588. }
  589. }
  590. function isFlankedByWhitespace (side, node, options) {
  591. var sibling;
  592. var regExp;
  593. var isFlanked;
  594. if (side === 'left') {
  595. sibling = node.previousSibling;
  596. regExp = / $/;
  597. } else {
  598. sibling = node.nextSibling;
  599. regExp = /^ /;
  600. }
  601. if (sibling) {
  602. if (sibling.nodeType === 3) {
  603. isFlanked = regExp.test(sibling.nodeValue);
  604. } else if (options.preformattedCode && sibling.nodeName === 'CODE') {
  605. isFlanked = false;
  606. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  607. isFlanked = regExp.test(sibling.textContent);
  608. }
  609. }
  610. return isFlanked
  611. }
  612. var reduce = Array.prototype.reduce;
  613. var escapes = [
  614. [/\\/g, '\\\\'],
  615. [/\*/g, '\\*'],
  616. [/^-/g, '\\-'],
  617. [/^\+ /g, '\\+ '],
  618. [/^(=+)/g, '\\$1'],
  619. [/^(#{1,6}) /g, '\\$1 '],
  620. [/`/g, '\\`'],
  621. [/^~~~/g, '\\~~~'],
  622. [/\[/g, '\\['],
  623. [/\]/g, '\\]'],
  624. [/^>/g, '\\>'],
  625. [/_/g, '\\_'],
  626. [/^(\d+)\. /g, '$1\\. ']
  627. ];
  628. function TurndownService (options) {
  629. if (!(this instanceof TurndownService)) return new TurndownService(options)
  630. var defaults = {
  631. rules: rules,
  632. headingStyle: 'setext',
  633. hr: '* * *',
  634. bulletListMarker: '*',
  635. codeBlockStyle: 'indented',
  636. fence: '```',
  637. emDelimiter: '_',
  638. strongDelimiter: '**',
  639. linkStyle: 'inlined',
  640. linkReferenceStyle: 'full',
  641. br: ' ',
  642. preformattedCode: false,
  643. blankReplacement: function (content, node) {
  644. return node.isBlock ? '\n\n' : ''
  645. },
  646. keepReplacement: function (content, node) {
  647. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  648. },
  649. defaultReplacement: function (content, node) {
  650. return node.isBlock ? '\n\n' + content + '\n\n' : content
  651. }
  652. };
  653. this.options = extend({}, defaults, options);
  654. this.rules = new Rules(this.options);
  655. }
  656. TurndownService.prototype = {
  657. /**
  658. * The entry point for converting a string or DOM node to Markdown
  659. * @public
  660. * @param {String|HTMLElement} input The string or DOM node to convert
  661. * @returns A Markdown representation of the input
  662. * @type String
  663. */
  664. turndown: function (input) {
  665. if (!canConvert(input)) {
  666. throw new TypeError(
  667. input + ' is not a string, or an element/document/fragment node.'
  668. )
  669. }
  670. if (input === '') return ''
  671. var output = process.call(this, new RootNode(input, this.options));
  672. return postProcess.call(this, output)
  673. },
  674. /**
  675. * Add one or more plugins
  676. * @public
  677. * @param {Function|Array} plugin The plugin or array of plugins to add
  678. * @returns The Turndown instance for chaining
  679. * @type Object
  680. */
  681. use: function (plugin) {
  682. if (Array.isArray(plugin)) {
  683. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  684. } else if (typeof plugin === 'function') {
  685. plugin(this);
  686. } else {
  687. throw new TypeError('plugin must be a Function or an Array of Functions')
  688. }
  689. return this
  690. },
  691. /**
  692. * Adds a rule
  693. * @public
  694. * @param {String} key The unique key of the rule
  695. * @param {Object} rule The rule
  696. * @returns The Turndown instance for chaining
  697. * @type Object
  698. */
  699. addRule: function (key, rule) {
  700. this.rules.add(key, rule);
  701. return this
  702. },
  703. /**
  704. * Keep a node (as HTML) that matches the filter
  705. * @public
  706. * @param {String|Array|Function} filter The unique key of the rule
  707. * @returns The Turndown instance for chaining
  708. * @type Object
  709. */
  710. keep: function (filter) {
  711. this.rules.keep(filter);
  712. return this
  713. },
  714. /**
  715. * Remove a node that matches the filter
  716. * @public
  717. * @param {String|Array|Function} filter The unique key of the rule
  718. * @returns The Turndown instance for chaining
  719. * @type Object
  720. */
  721. remove: function (filter) {
  722. this.rules.remove(filter);
  723. return this
  724. },
  725. /**
  726. * Escapes Markdown syntax
  727. * @public
  728. * @param {String} string The string to escape
  729. * @returns A string with Markdown syntax escaped
  730. * @type String
  731. */
  732. escape: function (string) {
  733. return escapes.reduce(function (accumulator, escape) {
  734. return accumulator.replace(escape[0], escape[1])
  735. }, string)
  736. }
  737. };
  738. /**
  739. * Reduces a DOM node down to its Markdown string equivalent
  740. * @private
  741. * @param {HTMLElement} parentNode The node to convert
  742. * @returns A Markdown representation of the node
  743. * @type String
  744. */
  745. function process (parentNode) {
  746. var self = this;
  747. return reduce.call(parentNode.childNodes, function (output, node) {
  748. node = new Node(node, self.options);
  749. var replacement = '';
  750. if (node.nodeType === 3) {
  751. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  752. } else if (node.nodeType === 1) {
  753. replacement = replacementForNode.call(self, node);
  754. }
  755. return join(output, replacement)
  756. }, '')
  757. }
  758. /**
  759. * Appends strings as each rule requires and trims the output
  760. * @private
  761. * @param {String} output The conversion output
  762. * @returns A trimmed version of the ouput
  763. * @type String
  764. */
  765. function postProcess (output) {
  766. var self = this;
  767. this.rules.forEach(function (rule) {
  768. if (typeof rule.append === 'function') {
  769. output = join(output, rule.append(self.options));
  770. }
  771. });
  772. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  773. }
  774. /**
  775. * Converts an element node to its Markdown equivalent
  776. * @private
  777. * @param {HTMLElement} node The node to convert
  778. * @returns A Markdown representation of the node
  779. * @type String
  780. */
  781. function replacementForNode (node) {
  782. var rule = this.rules.forNode(node);
  783. var content = process.call(this, node);
  784. var whitespace = node.flankingWhitespace;
  785. if (whitespace.leading || whitespace.trailing) content = content.trim();
  786. return (
  787. whitespace.leading +
  788. rule.replacement(content, node, this.options) +
  789. whitespace.trailing
  790. )
  791. }
  792. /**
  793. * Joins replacement to the current output with appropriate number of new lines
  794. * @private
  795. * @param {String} output The current conversion output
  796. * @param {String} replacement The string to append to the output
  797. * @returns Joined output
  798. * @type String
  799. */
  800. function join (output, replacement) {
  801. var s1 = trimTrailingNewlines(output);
  802. var s2 = trimLeadingNewlines(replacement);
  803. var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
  804. var separator = '\n\n'.substring(0, nls);
  805. return s1 + separator + s2
  806. }
  807. /**
  808. * Determines whether an input can be converted
  809. * @private
  810. * @param {String|HTMLElement} input Describe this parameter
  811. * @returns Describe what it returns
  812. * @type String|Object|Array|Boolean|Number
  813. */
  814. function canConvert (input) {
  815. return (
  816. input != null && (
  817. typeof input === 'string' ||
  818. (input.nodeType && (
  819. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  820. ))
  821. )
  822. )
  823. }
  824. return TurndownService;
  825. }());