html_parser.cljs 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. (ns frontend.extensions.html-parser
  2. (:require [cljs.core.match :refer [match]]
  3. [clojure.string :as string]
  4. [clojure.walk :as walk]
  5. [frontend.config :as config]
  6. [frontend.util :as util]
  7. [hickory.core :as hickory]))
  8. (defonce *inside-pre? (atom false))
  9. (defn- hiccup-without-style
  10. [hiccup]
  11. (walk/postwalk (fn [f]
  12. (if (map? f)
  13. (dissoc f :style)
  14. f)) hiccup))
  15. (defn- export-hiccup
  16. [hiccup]
  17. (util/format "#+BEGIN_EXPORT hiccup\n%s\n#+END_EXPORT"
  18. (str (hiccup-without-style hiccup))))
  19. (defn hiccup->doc-inner
  20. [format hiccup]
  21. (let [transform-fn (fn [hiccup]
  22. (hiccup->doc-inner format hiccup))
  23. block-pattern (config/get-block-pattern format)
  24. map-join (fn [children] (apply str (map transform-fn children)))
  25. block-transform (fn [level children]
  26. (str (apply str (repeat level block-pattern))
  27. " "
  28. (->> (map transform-fn children)
  29. (string/join " "))
  30. "\n"))
  31. emphasis-transform (fn [tag _attrs children]
  32. (let [pattern (cond
  33. (contains? #{:b :strong} tag)
  34. (config/get-bold format)
  35. (contains? #{:i :em} tag)
  36. (config/get-italic format)
  37. (contains? #{:ins} tag)
  38. (config/get-underline format)
  39. (contains? #{:del} tag)
  40. (config/get-strike-through format)
  41. (contains? #{:mark} tag)
  42. (config/get-highlight format)
  43. :else
  44. nil)]
  45. (str pattern (map-join children) pattern)))
  46. wrapper (fn [tag content]
  47. (cond
  48. (contains? #{:p :hr :ul :ol :dl :table :pre :blockquote :aside :canvas
  49. :center :figure :figcaption :fieldset :div :footer
  50. :header} tag)
  51. (str "\n\n" content "\n\n")
  52. (contains? #{:thead :tr :li} tag)
  53. (str content "\n")
  54. :else
  55. content))
  56. single-hiccup-transform
  57. (fn [x]
  58. (cond
  59. (vector? x)
  60. (let [[tag attrs & children] x
  61. result (match tag
  62. :head nil
  63. :h1 (block-transform 1 children)
  64. :h2 (block-transform 2 children)
  65. :h3 (block-transform 3 children)
  66. :h4 (block-transform 4 children)
  67. :h5 (block-transform 5 children)
  68. :h6 (block-transform 6 children)
  69. :a (let [href (:href attrs)
  70. label (map-join children)
  71. has-img-tag? (util/safe-re-find #"\[:img" (str x))]
  72. (if has-img-tag?
  73. (export-hiccup x)
  74. (case format
  75. :markdown (util/format "[%s](%s)" label href)
  76. :org (util/format "[[%s][%s]]" href label)
  77. nil)))
  78. :img (let [src (:src attrs)
  79. alt (:alt attrs)]
  80. (case format
  81. :markdown (util/format "![%s](%s)" alt src)
  82. :org (util/format "[[%s][%s]]" src alt)
  83. nil))
  84. :p (util/format "%s"
  85. (map-join children))
  86. :hr (config/get-hr format)
  87. (_ :guard #(contains? #{:b :strong
  88. :i :em
  89. :ins
  90. :del
  91. :mark} %))
  92. (emphasis-transform tag attrs children)
  93. :code (if @*inside-pre?
  94. (map-join children)
  95. (let [pattern (config/get-code format)]
  96. (str " "
  97. (str pattern (first children) pattern)
  98. " ")))
  99. :pre
  100. (do
  101. (reset! *inside-pre? true)
  102. (let [content (string/trim (doall (map-join children)))]
  103. (reset! *inside-pre? false)
  104. (case format
  105. :markdown (if (util/starts-with? content "```")
  106. content
  107. (str "```\n" content "\n```"))
  108. :org (if (util/starts-with? content "#+BEGIN_SRC")
  109. content
  110. (util/format "#+BEGIN_SRC\n%s\n#+END_SRC" content))
  111. nil)))
  112. :blockquote
  113. (case format
  114. :markdown (str "> " (map-join children))
  115. :org (util/format "#+BEGIN_QUOTE\n%s\n#+END_QUOTE" (map-join children))
  116. nil)
  117. :li
  118. (str "- " (map-join children))
  119. :dt
  120. (case format
  121. :org (str "- " (map-join children) " ")
  122. :markdown (str (map-join children) "\n")
  123. nil)
  124. :dd
  125. (case format
  126. :markdown (str ": " (map-join children) "\n")
  127. :org (str ":: " (map-join children) "\n")
  128. nil)
  129. :thead
  130. (case format
  131. :markdown (let [columns (count (last (first children)))]
  132. (str
  133. (map-join children)
  134. (str "| " (string/join " | "
  135. (repeat columns "----"))
  136. " |")))
  137. :org (let [columns (count (last (first children)))]
  138. (str
  139. (map-join children)
  140. (str "|" (string/join "+"
  141. (repeat columns "----"))
  142. "|")))
  143. nil)
  144. :tr
  145. (str "| "
  146. (->> (map transform-fn children)
  147. (string/join " | "))
  148. " |")
  149. (_ :guard #(contains? #{:aside :center :figure :figcaption :fieldset :footer :header} %))
  150. (export-hiccup x)
  151. :else (map-join children))]
  152. (wrapper tag result))
  153. (string? x)
  154. x
  155. :else
  156. (println "hiccup->doc error: " x)))
  157. result (if (vector? (first hiccup))
  158. (for [x hiccup]
  159. (single-hiccup-transform x))
  160. (single-hiccup-transform hiccup))]
  161. (apply str result)))
  162. (defn hiccup->doc
  163. [format hiccup]
  164. (let [s (hiccup->doc-inner format hiccup)]
  165. (if (string/blank? s)
  166. ""
  167. (-> s
  168. (string/trim)
  169. (string/replace "\n\n\n\n" "\n\n")
  170. (string/replace "\n\n\n" "\n\n")))))
  171. (defn html-decode-hiccup
  172. [hiccup]
  173. (walk/postwalk (fn [f]
  174. (if (string? f)
  175. (goog.string.unescapeEntities f)
  176. f)) hiccup))
  177. (defn parse
  178. [format html]
  179. (when-not (string/blank? html)
  180. (let [hiccup (hickory/as-hiccup (hickory/parse html))
  181. decoded-hiccup (html-decode-hiccup hiccup)]
  182. (hiccup->doc format decoded-hiccup))))
  183. (comment
  184. ;; | Syntax | Description | Test Text |``
  185. ;; | :--- | :----: | ---: |
  186. ;; | Header | Title | Here's this |
  187. ;; | Paragraph | Text | And more |
  188. (def img-link
  189. [:a {:href "https://www.markdownguide.org/book/", :style "box-sizing: border-box; color: rgb(0, 123, 255); text-decoration: none; background-color: transparent;"} [:img {:src "https://d33wubrfki0l68.cloudfront.net/cb41dd8e38b0543a305f9c56db89b46caa802263/25192/assets/images/book-cover.jpg", :class "card-img", :alt "Markdown Guide book cover", :style "box-sizing: border-box; vertical-align: middle; border-style: none; flex-shrink: 0; width: 205.75px; border-radius: calc(0.25rem - 1px);"}]]))