webviewexporter.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. #include "webviewexporter.h"
  2. #include <QWidget>
  3. #include <QWebEnginePage>
  4. #include <QFileInfo>
  5. #include <QTemporaryDir>
  6. #include <QProcess>
  7. #include <QRegularExpression>
  8. #include <widgets/editors/markdownviewer.h>
  9. #include <widgets/editors/markdownvieweradapter.h>
  10. #include <core/editorconfig.h>
  11. #include <core/markdowneditorconfig.h>
  12. #include <core/configmgr.h>
  13. #include <core/htmltemplatehelper.h>
  14. #include <utils/utils.h>
  15. #include <utils/pathutils.h>
  16. #include <utils/fileutils.h>
  17. #include <utils/webutils.h>
  18. #include <utils/processutils.h>
  19. #include <utils/htmlutils.h>
  20. #include <core/file.h>
  21. using namespace vnotex;
  22. static const QString c_imgRegExp = "<img ([^>]*)src=\"(?!data:)([^\"]+)\"([^>]*)>";
  23. WebViewExporter::WebViewExporter(QWidget *p_parent)
  24. : QObject(p_parent)
  25. {
  26. }
  27. WebViewExporter::~WebViewExporter()
  28. {
  29. clear();
  30. }
  31. void WebViewExporter::clear()
  32. {
  33. m_askedToStop = false;
  34. delete m_viewer;
  35. m_viewer = nullptr;
  36. m_htmlTemplate.clear();
  37. m_exportHtmlTemplate.clear();
  38. m_exportOngoing = false;
  39. }
  40. bool WebViewExporter::doExport(const ExportOption &p_option,
  41. const File *p_file,
  42. const QString &p_outputFile)
  43. {
  44. bool ret = false;
  45. m_askedToStop = false;
  46. Q_ASSERT(p_file->getContentType().isMarkdown());
  47. Q_ASSERT(!m_exportOngoing);
  48. m_exportOngoing = true;
  49. m_webViewStates = WebViewState::Started;
  50. auto baseUrl = PathUtils::pathToUrl(p_file->getContentPath());
  51. m_viewer->adapter()->reset();
  52. m_viewer->setHtml(m_htmlTemplate, baseUrl);
  53. auto textContent = p_file->read();
  54. if (p_option.m_targetFormat == ExportFormat::PDF
  55. && p_option.m_pdfOption.m_addTableOfContents
  56. && !p_option.m_pdfOption.m_useWkhtmltopdf) {
  57. // Add `[TOC]` at the beginning.
  58. m_viewer->adapter()->setText("[TOC]\n\n" + textContent);
  59. } else {
  60. m_viewer->adapter()->setText(textContent);
  61. }
  62. while (!isWebViewReady()) {
  63. Utils::sleepWait(100);
  64. if (m_askedToStop) {
  65. goto exit_export;
  66. }
  67. if (isWebViewFailed()) {
  68. qWarning() << "WebView failed when exporting" << p_file->getFilePath();
  69. goto exit_export;
  70. }
  71. }
  72. qDebug() << "WebView is ready";
  73. // Add extra wait to make sure Web side is really ready.
  74. Utils::sleepWait(200);
  75. switch (p_option.m_targetFormat) {
  76. case ExportFormat::HTML:
  77. // TODO: MIME HTML format is not supported yet.
  78. Q_ASSERT(!p_option.m_htmlOption.m_useMimeHtmlFormat);
  79. ret = doExportHtml(p_option.m_htmlOption, p_outputFile, baseUrl);
  80. break;
  81. case ExportFormat::PDF:
  82. if (p_option.m_pdfOption.m_useWkhtmltopdf) {
  83. ret = doExportWkhtmltopdf(p_option.m_pdfOption, p_outputFile, baseUrl);
  84. } else {
  85. ret = doExportPdf(p_option.m_pdfOption, p_outputFile);
  86. }
  87. break;
  88. default:
  89. break;
  90. }
  91. exit_export:
  92. m_exportOngoing = false;
  93. return ret;
  94. }
  95. void WebViewExporter::stop()
  96. {
  97. m_askedToStop = true;
  98. }
  99. bool WebViewExporter::isWebViewReady() const
  100. {
  101. return m_webViewStates == (WebViewState::LoadFinished | WebViewState::WorkFinished);
  102. }
  103. bool WebViewExporter::isWebViewFailed() const
  104. {
  105. return m_webViewStates & WebViewState::Failed;
  106. }
  107. bool WebViewExporter::doExportHtml(const ExportHtmlOption &p_htmlOption,
  108. const QString &p_outputFile,
  109. const QUrl &p_baseUrl)
  110. {
  111. ExportState state = ExportState::Busy;
  112. connect(m_viewer->adapter(), &MarkdownViewerAdapter::contentReady,
  113. this, [&, this](const QString &p_headContent,
  114. const QString &p_styleContent,
  115. const QString &p_content,
  116. const QString &p_bodyClassList) {
  117. qDebug() << "doExportHtml contentReady";
  118. // Maybe unnecessary. Just to avoid duplicated signal connections.
  119. disconnect(m_viewer->adapter(), &MarkdownViewerAdapter::contentReady, this, 0);
  120. if (p_content.isEmpty() || m_askedToStop) {
  121. state = ExportState::Failed;
  122. return;
  123. }
  124. if (!writeHtmlFile(p_outputFile,
  125. p_baseUrl,
  126. p_headContent,
  127. p_styleContent,
  128. p_content,
  129. p_bodyClassList,
  130. p_htmlOption.m_embedStyles,
  131. p_htmlOption.m_completePage,
  132. p_htmlOption.m_embedImages)) {
  133. state = ExportState::Failed;
  134. return;
  135. }
  136. state = ExportState::Finished;
  137. });
  138. m_viewer->adapter()->saveContent();
  139. while (state == ExportState::Busy) {
  140. Utils::sleepWait(100);
  141. if (m_askedToStop) {
  142. break;
  143. }
  144. }
  145. return state == ExportState::Finished;
  146. }
  147. bool WebViewExporter::writeHtmlFile(const QString &p_file,
  148. const QUrl &p_baseUrl,
  149. const QString &p_headContent,
  150. QString p_styleContent,
  151. const QString &p_content,
  152. const QString &p_bodyClassList,
  153. bool p_embedStyles,
  154. bool p_completePage,
  155. bool p_embedImages)
  156. {
  157. const auto baseName = QFileInfo(p_file).completeBaseName();
  158. const QString resourceFolderName = baseName + "_files";
  159. auto resourceFolder = PathUtils::concatenateFilePath(PathUtils::parentDirPath(p_file), resourceFolderName);
  160. qDebug() << "HTML files folder" << resourceFolder;
  161. auto htmlContent = m_exportHtmlTemplate;
  162. const auto title = QStringLiteral("%1").arg(baseName);
  163. HtmlTemplateHelper::fillTitle(htmlContent, title);
  164. if (!p_styleContent.isEmpty() && p_embedStyles) {
  165. embedStyleResources(p_styleContent);
  166. HtmlTemplateHelper::fillStyleContent(htmlContent, p_styleContent);
  167. }
  168. if (!p_headContent.isEmpty()) {
  169. HtmlTemplateHelper::fillHeadContent(htmlContent, p_headContent);
  170. }
  171. if (p_completePage) {
  172. QString content(p_content);
  173. if (p_embedImages) {
  174. embedBodyResources(p_baseUrl, content);
  175. } else {
  176. fixBodyResources(p_baseUrl, resourceFolder, content);
  177. }
  178. HtmlTemplateHelper::fillContent(htmlContent, content);
  179. } else {
  180. HtmlTemplateHelper::fillContent(htmlContent, p_content);
  181. }
  182. if (!p_bodyClassList.isEmpty()) {
  183. HtmlTemplateHelper::fillBodyClassList(htmlContent, p_bodyClassList);
  184. }
  185. FileUtils::writeFile(p_file, htmlContent);
  186. // Delete empty resource folder.
  187. QDir dir(resourceFolder);
  188. if (dir.exists() && dir.isEmpty()) {
  189. dir.cdUp();
  190. dir.rmdir(resourceFolderName);
  191. }
  192. return true;
  193. }
  194. QSize WebViewExporter::pageLayoutSize(const QPageLayout &p_layout) const
  195. {
  196. Q_ASSERT(m_viewer);
  197. auto rect = p_layout.paintRect(QPageLayout::Inch);
  198. return QSize(rect.width() * m_viewer->logicalDpiX(), rect.height() * m_viewer->logicalDpiY());
  199. }
  200. void WebViewExporter::prepare(const ExportOption &p_option)
  201. {
  202. Q_ASSERT(!m_viewer && !m_exportOngoing);
  203. Q_ASSERT(p_option.m_targetFormat == ExportFormat::PDF || p_option.m_targetFormat == ExportFormat::HTML);
  204. {
  205. // Adapter will be managed by MarkdownViewer.
  206. auto adapter = new MarkdownViewerAdapter(this);
  207. m_viewer = new MarkdownViewer(adapter, QColor(), 1, static_cast<QWidget *>(parent()));
  208. m_viewer->hide();
  209. connect(m_viewer->page(), &QWebEnginePage::loadFinished,
  210. this, [this]() {
  211. m_webViewStates |= WebViewState::LoadFinished;
  212. });
  213. connect(adapter, &MarkdownViewerAdapter::workFinished,
  214. this, [this]() {
  215. m_webViewStates |= WebViewState::WorkFinished;
  216. });
  217. }
  218. bool scrollable = true;
  219. if (p_option.m_targetFormat == ExportFormat::PDF
  220. || (p_option.m_targetFormat == ExportFormat::HTML && !p_option.m_htmlOption.m_scrollable)
  221. || (p_option.m_targetFormat == ExportFormat::Custom && !p_option.m_customOption->m_targetPageScrollable)) {
  222. scrollable = false;
  223. }
  224. const auto &config = ConfigMgr::getInst().getEditorConfig().getMarkdownEditorConfig();
  225. bool useWkhtmltopdf = false;
  226. QSize pageBodySize(1024, 768);
  227. if (p_option.m_targetFormat == ExportFormat::PDF) {
  228. useWkhtmltopdf = p_option.m_pdfOption.m_useWkhtmltopdf;
  229. pageBodySize = pageLayoutSize(*(p_option.m_pdfOption.m_layout));
  230. }
  231. qDebug() << "export page body size" << pageBodySize;
  232. HtmlTemplateHelper::MarkdownParas paras;
  233. paras.m_webStyleSheetFile = p_option.m_renderingStyleFile;
  234. paras.m_highlightStyleSheetFile = p_option.m_syntaxHighlightStyleFile;
  235. paras.m_transparentBackgroundEnabled = p_option.m_useTransparentBg;
  236. paras.m_scrollable = scrollable;
  237. paras.m_bodyWidth = pageBodySize.width();
  238. paras.m_bodyHeight = pageBodySize.height();
  239. paras.m_transformSvgToPngEnabled = p_option.m_transformSvgToPngEnabled;
  240. paras.m_mathJaxScale = useWkhtmltopdf ? 2.5 : -1;
  241. paras.m_removeCodeToolBarEnabled = p_option.m_removeCodeToolBarEnabled;
  242. m_htmlTemplate = HtmlTemplateHelper::generateMarkdownViewerTemplate(config, paras);
  243. {
  244. const bool addOutlinePanel = p_option.m_targetFormat == ExportFormat::HTML && p_option.m_htmlOption.m_addOutlinePanel;
  245. m_exportHtmlTemplate = HtmlTemplateHelper::generateMarkdownExportTemplate(config, addOutlinePanel);
  246. }
  247. if (useWkhtmltopdf) {
  248. prepareWkhtmltopdfArguments(p_option.m_pdfOption);
  249. }
  250. }
  251. static QString marginToStrMM(qreal p_margin)
  252. {
  253. return QStringLiteral("%1mm").arg(p_margin);
  254. }
  255. void WebViewExporter::prepareWkhtmltopdfArguments(const ExportPdfOption &p_pdfOption)
  256. {
  257. m_wkhtmltopdfArgs.clear();
  258. // Page layout.
  259. {
  260. const auto &layout = p_pdfOption.m_layout;
  261. m_wkhtmltopdfArgs << "--page-size" << layout->pageSize().key();
  262. m_wkhtmltopdfArgs << "--orientation"
  263. << (layout->orientation() == QPageLayout::Portrait ? "Portrait" : "Landscape");
  264. const auto marginsMM = layout->margins(QPageLayout::Millimeter);
  265. m_wkhtmltopdfArgs << "--margin-bottom" << marginToStrMM(marginsMM.bottom());
  266. m_wkhtmltopdfArgs << "--margin-left" << marginToStrMM(marginsMM.left());
  267. m_wkhtmltopdfArgs << "--margin-right" << marginToStrMM(marginsMM.right());
  268. m_wkhtmltopdfArgs << "--margin-top" << marginToStrMM(marginsMM.top());
  269. // Footer.
  270. m_wkhtmltopdfArgs << "--footer-right" << "[page]"
  271. << "--footer-spacing" << QString::number(marginsMM.bottom() / 3, 'f', 2);
  272. }
  273. m_wkhtmltopdfArgs << "--encoding" << "utf-8";
  274. // Delay 10 seconds for MathJax.
  275. m_wkhtmltopdfArgs << "--javascript-delay" << "5000";
  276. m_wkhtmltopdfArgs << "--enable-local-file-access";
  277. // Append additional global option.
  278. if (!p_pdfOption.m_wkhtmltopdfArgs.isEmpty()) {
  279. m_wkhtmltopdfArgs.append(ProcessUtils::parseCombinedArgString(p_pdfOption.m_wkhtmltopdfArgs));
  280. }
  281. // Must be put after the global object options.
  282. if (p_pdfOption.m_addTableOfContents) {
  283. m_wkhtmltopdfArgs << "toc";
  284. m_wkhtmltopdfArgs << "--toc-text-size-shrink" << "1.0";
  285. m_wkhtmltopdfArgs << "--toc-header-text" << HtmlUtils::unicodeEncode(tr("Table of Contents"));
  286. }
  287. }
  288. bool WebViewExporter::embedStyleResources(QString &p_html) const
  289. {
  290. bool altered = false;
  291. QRegularExpression reg("\\burl\\(\"((file|qrc):[^\"\\)]+)\"\\);");
  292. int pos = 0;
  293. while (pos < p_html.size()) {
  294. QRegularExpressionMatch match;
  295. int idx = p_html.indexOf(reg, pos, &match);
  296. if (idx == -1) {
  297. break;
  298. }
  299. QString dataURI = WebUtils::toDataUri(QUrl(match.captured(1)), false);
  300. if (dataURI.isEmpty()) {
  301. pos = idx + match.capturedLength();
  302. } else {
  303. // Replace the url string in html.
  304. QString newUrl = QStringLiteral("url('%1');").arg(dataURI);
  305. p_html.replace(idx, match.capturedLength(), newUrl);
  306. pos = idx + newUrl.size();
  307. altered = true;
  308. }
  309. }
  310. return altered;
  311. }
  312. bool WebViewExporter::embedBodyResources(const QUrl &p_baseUrl, QString &p_html)
  313. {
  314. bool altered = false;
  315. if (p_baseUrl.isEmpty()) {
  316. return altered;
  317. }
  318. QRegularExpression reg(c_imgRegExp);
  319. int pos = 0;
  320. while (pos < p_html.size()) {
  321. QRegularExpressionMatch match;
  322. int idx = p_html.indexOf(reg, pos, &match);
  323. if (idx == -1) {
  324. break;
  325. }
  326. if (match.captured(2).isEmpty()) {
  327. pos = idx + match.capturedLength();
  328. continue;
  329. }
  330. QUrl srcUrl(p_baseUrl.resolved(match.captured(2)));
  331. const auto dataURI = WebUtils::toDataUri(srcUrl, true);
  332. if (dataURI.isEmpty()) {
  333. pos = idx + match.capturedLength();
  334. } else {
  335. // Replace the url string in html.
  336. QString newUrl = QStringLiteral("<img %1src='%2'%3>").arg(match.captured(1), dataURI, match.captured(3));
  337. p_html.replace(idx, match.capturedLength(), newUrl);
  338. pos = idx + newUrl.size();
  339. altered = true;
  340. }
  341. }
  342. return altered;
  343. }
  344. static QString getResourceRelativePath(const QString &p_file)
  345. {
  346. int idx = p_file.lastIndexOf('/');
  347. int idx2 = p_file.lastIndexOf('/', idx - 1);
  348. Q_ASSERT(idx > 0 && idx2 < idx);
  349. return "." + p_file.mid(idx2);
  350. }
  351. bool WebViewExporter::fixBodyResources(const QUrl &p_baseUrl,
  352. const QString &p_folder,
  353. QString &p_html)
  354. {
  355. bool altered = false;
  356. if (p_baseUrl.isEmpty()) {
  357. return altered;
  358. }
  359. QRegularExpression reg(c_imgRegExp);
  360. int pos = 0;
  361. while (pos < p_html.size()) {
  362. QRegularExpressionMatch match;
  363. int idx = p_html.indexOf(reg, pos, &match);
  364. if (idx == -1) {
  365. break;
  366. }
  367. if (match.captured(2).isEmpty()) {
  368. pos = idx + match.capturedLength();
  369. continue;
  370. }
  371. QUrl srcUrl(p_baseUrl.resolved(match.captured(2)));
  372. QString targetFile = WebUtils::copyResource(srcUrl, p_folder);
  373. if (targetFile.isEmpty()) {
  374. pos = idx + match.capturedLength();
  375. } else {
  376. // Replace the url string in html.
  377. QString newUrl = QStringLiteral("<img %1src=\"%2\"%3>").arg(match.captured(1), getResourceRelativePath(targetFile), match.captured(3));
  378. p_html.replace(idx, match.capturedLength(), newUrl);
  379. pos = idx + newUrl.size();
  380. altered = true;
  381. }
  382. }
  383. return altered;
  384. }
  385. bool WebViewExporter::doExportPdf(const ExportPdfOption &p_pdfOption, const QString &p_outputFile)
  386. {
  387. ExportState state = ExportState::Busy;
  388. m_viewer->page()->printToPdf([&, this](const QByteArray &p_result) {
  389. qDebug() << "doExportPdf printToPdf ready";
  390. if (p_result.isEmpty() || m_askedToStop) {
  391. state = ExportState::Failed;
  392. return;
  393. }
  394. Q_ASSERT(!p_outputFile.isEmpty());
  395. FileUtils::writeFile(p_outputFile, p_result);
  396. state = ExportState::Finished;
  397. }, *p_pdfOption.m_layout);
  398. while (state == ExportState::Busy) {
  399. Utils::sleepWait(100);
  400. if (m_askedToStop) {
  401. break;
  402. }
  403. }
  404. return state == ExportState::Finished;
  405. }
  406. bool WebViewExporter::doExportWkhtmltopdf(const ExportPdfOption &p_pdfOption, const QString &p_outputFile, const QUrl &p_baseUrl)
  407. {
  408. if (p_pdfOption.m_wkhtmltopdfExePath.isEmpty()) {
  409. qWarning() << "invalid wkhtmltopdf executable path";
  410. return false;
  411. }
  412. ExportState state = ExportState::Busy;
  413. connect(m_viewer->adapter(), &MarkdownViewerAdapter::contentReady,
  414. this, [&, this](const QString &p_headContent,
  415. const QString &p_styleContent,
  416. const QString &p_content,
  417. const QString &p_bodyClassList) {
  418. qDebug() << "doExportWkhtmltopdf contentReady";
  419. // Maybe unnecessary. Just to avoid duplicated signal connections.
  420. disconnect(m_viewer->adapter(), &MarkdownViewerAdapter::contentReady, this, 0);
  421. if (p_content.isEmpty() || m_askedToStop) {
  422. state = ExportState::Failed;
  423. return;
  424. }
  425. // Save HTML to a temp dir.
  426. QTemporaryDir tmpDir;
  427. if (!tmpDir.isValid()) {
  428. state = ExportState::Failed;
  429. return;
  430. }
  431. auto tmpHtmlFile = tmpDir.filePath("vnote_export_tmp.html");
  432. if (!writeHtmlFile(tmpHtmlFile,
  433. p_baseUrl,
  434. p_headContent,
  435. p_styleContent,
  436. p_content,
  437. p_bodyClassList,
  438. true,
  439. true,
  440. false)) {
  441. state = ExportState::Failed;
  442. return;
  443. }
  444. // Convert HTML to PDF via wkhtmltopdf.
  445. if (htmlToPdfViaWkhtmltopdf(p_pdfOption, QStringList() << tmpHtmlFile, p_outputFile)) {
  446. state = ExportState::Finished;
  447. } else {
  448. state = ExportState::Failed;
  449. }
  450. });
  451. m_viewer->adapter()->saveContent();
  452. while (state == ExportState::Busy) {
  453. Utils::sleepWait(100);
  454. if (m_askedToStop) {
  455. break;
  456. }
  457. }
  458. return state == ExportState::Finished;
  459. }
  460. bool WebViewExporter::htmlToPdfViaWkhtmltopdf(const ExportPdfOption &p_pdfOption, const QStringList &p_htmlFiles, const QString &p_outputFile)
  461. {
  462. QStringList args(m_wkhtmltopdfArgs);
  463. // Prepare the args.
  464. for (auto const &file : p_htmlFiles) {
  465. // Note: system's locale settings (Language for non-Unicode programs) is important to wkhtmltopdf.
  466. // Input file could be encoded via QUrl::fromLocalFile(p_htmlFile).toString(QUrl::EncodeUnicode) to
  467. // handle non-ASCII path. But for the output file, it is useless.
  468. args << QUrl::fromLocalFile(QDir::toNativeSeparators(file)).toString(QUrl::EncodeUnicode);
  469. }
  470. // To handle non-ASCII path, export it to a temp file and then copy it.
  471. QTemporaryDir tmpDir;
  472. if (!tmpDir.isValid()) {
  473. return false;
  474. }
  475. const auto tmpFile = tmpDir.filePath("vx_tmp_output.pdf");
  476. args << QDir::toNativeSeparators(tmpFile);
  477. bool ret = startProcess(QDir::toNativeSeparators(p_pdfOption.m_wkhtmltopdfExePath), args);
  478. if (ret && QFileInfo::exists(tmpFile)) {
  479. emit logRequested(tr("Copy output file (%1) to (%2).").arg(tmpFile, p_outputFile));
  480. FileUtils::copyFile(tmpFile, p_outputFile);
  481. }
  482. return ret;
  483. }
  484. bool WebViewExporter::startProcess(const QString &p_program, const QStringList &p_args)
  485. {
  486. emit logRequested(p_program + " " + ProcessUtils::combineArgString(p_args));
  487. auto ret = ProcessUtils::start(p_program,
  488. p_args,
  489. [this](const QString &p_log) {
  490. emit logRequested(p_log);
  491. },
  492. m_askedToStop);
  493. return ret == ProcessUtils::State::Succeeded;
  494. }