extract.cljs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. (ns frontend.handler.extract
  2. "Extract helper."
  3. (:require [clojure.set :as set]
  4. [clojure.string :as string]
  5. [clojure.walk :as walk]
  6. [frontend.config :as config]
  7. [frontend.db :as db]
  8. [frontend.format :as format]
  9. [frontend.format.block :as block]
  10. [frontend.format.mldoc :as mldoc]
  11. [frontend.state :as state]
  12. [frontend.text :as text]
  13. [frontend.utf8 :as utf8]
  14. [frontend.util :as util]
  15. [frontend.util.property :as property]
  16. [lambdaisland.glogi :as log]
  17. [promesa.core :as p]
  18. [frontend.mobile.util :as mobile]))
  19. (defn get-page-name
  20. [file ast]
  21. ;; headline
  22. (let [ast (map first ast)]
  23. (if (string/includes? file "pages/contents.")
  24. "Contents"
  25. (let [first-block (last (first (filter block/heading-block? ast)))
  26. property-name (when (and (contains? #{"Properties" "Property_Drawer"} (ffirst ast))
  27. (not (string/blank? (:title (last (first ast))))))
  28. (:title (last (first ast))))
  29. first-block-name (let [title (last (first (:title first-block)))]
  30. (and first-block
  31. (string? title)
  32. title))
  33. file-name (when-let [file-name (last (string/split file #"/"))]
  34. (let [result (first (util/split-last "." file-name))]
  35. (if (config/mldoc-support? (string/lower-case (util/get-file-ext file)))
  36. (string/replace result "." "/")
  37. result)))]
  38. (or property-name
  39. (if (= (state/page-name-order) "heading")
  40. (or first-block-name file-name)
  41. (or file-name first-block-name)))))))
  42. ;; TODO: performance improvement
  43. (defn- extract-pages-and-blocks
  44. #_:clj-kondo/ignore
  45. [repo-url format ast properties file content _utf8-content _journal?]
  46. (try
  47. (let [page (get-page-name file ast)
  48. [_original-page-name page-name _journal-day] (block/convert-page-if-journal page)
  49. blocks (->> (block/extract-blocks ast content false format)
  50. (block/with-parent-and-left {:block/name page-name}))
  51. ref-pages (atom #{})
  52. ref-tags (atom #{})
  53. blocks (map (fn [block]
  54. (let [block-ref-pages (seq (:block/refs block))
  55. page-lookup-ref [:block/name page-name]
  56. block-path-ref-pages (->> (cons page-lookup-ref (seq (:block/path-refs block)))
  57. (remove nil?))]
  58. (when block-ref-pages
  59. (swap! ref-pages set/union (set block-ref-pages)))
  60. (-> block
  61. (dissoc :ref-pages)
  62. (assoc :block/format format
  63. :block/page [:block/name page-name]
  64. :block/refs block-ref-pages
  65. :block/path-refs block-path-ref-pages))))
  66. blocks)
  67. page-entity (let [alias (:alias properties)
  68. alias (if (string? alias) [alias] alias)
  69. aliases (and alias
  70. (seq (remove #(= page-name (util/page-name-sanity-lc %))
  71. alias)))
  72. aliases (->>
  73. (map
  74. (fn [alias]
  75. (let [page-name (util/page-name-sanity-lc alias)
  76. aliases (distinct
  77. (conj
  78. (remove #{alias} aliases)
  79. page))
  80. aliases (when (seq aliases)
  81. (map
  82. (fn [alias]
  83. {:block/name (util/page-name-sanity-lc alias)})
  84. aliases))]
  85. (if (seq aliases)
  86. {:block/name page-name
  87. :block/alias aliases}
  88. {:block/name page-name})))
  89. aliases)
  90. (remove nil?))]
  91. (cond->
  92. (util/remove-nils
  93. (assoc
  94. (block/page-name->map page false)
  95. :block/file {:file/path (util/path-normalize file)}))
  96. (seq properties)
  97. (assoc :block/properties properties)
  98. (seq aliases)
  99. (assoc :block/alias aliases)
  100. (:tags properties)
  101. (assoc :block/tags (let [tags (:tags properties)
  102. tags (if (string? tags) [tags] tags)
  103. tags (remove string/blank? tags)]
  104. (swap! ref-tags set/union (set tags))
  105. (map (fn [tag] {:block/name (util/page-name-sanity-lc tag)
  106. :block/original-name tag})
  107. tags)))))
  108. namespace-pages (let [page (:block/original-name page-entity)]
  109. (when (text/namespace-page? page)
  110. (->> (util/split-namespace-pages page)
  111. (map (fn [page]
  112. (-> (block/page-name->map page true)
  113. (assoc :block/format format)))))))
  114. pages (->> (concat
  115. [page-entity]
  116. @ref-pages
  117. (map
  118. (fn [page]
  119. {:block/original-name page
  120. :block/name (util/page-name-sanity-lc page)})
  121. @ref-tags)
  122. namespace-pages)
  123. ;; remove block references
  124. (remove vector?)
  125. (remove nil?))
  126. pages (util/distinct-by :block/name pages)
  127. pages (remove nil? pages)
  128. pages (map (fn [page] (assoc page :block/uuid (db/new-block-id))) pages)
  129. blocks (->> (remove nil? blocks)
  130. (map (fn [b] (dissoc b :block/title :block/body))))]
  131. [pages blocks])
  132. (catch js/Error e
  133. (log/error :exception e))))
  134. (defn extract-blocks-pages
  135. ([repo-url file content]
  136. (extract-blocks-pages repo-url file content (utf8/encode content)))
  137. ([repo-url file content utf8-content]
  138. (if (string/blank? content)
  139. (p/resolved [])
  140. (p/let [format (format/get-format file)
  141. _ (println "Parsing start : " file)
  142. parse-f (if (and (mobile/is-native-platform?) config/dev?)
  143. mldoc/->edn
  144. (fn [content config]
  145. (mldoc/->edn-async file content config)))
  146. ast (parse-f content (mldoc/default-config format))]
  147. _ (println "Parsing finished : " file)
  148. (let [journal? (config/journal? file)
  149. first-block (ffirst ast)
  150. properties (let [properties (and (property/properties-ast? first-block)
  151. (->> (last first-block)
  152. (map (fn [[x y]]
  153. [x (if (string? y)
  154. (text/parse-property x y)
  155. y)]))
  156. (into {})
  157. (walk/keywordize-keys)))]
  158. (when (and properties (seq properties))
  159. (if (:filters properties)
  160. (update properties :filters
  161. (fn [v]
  162. (string/replace (or v "") "\\" "")))
  163. properties)))]
  164. (extract-pages-and-blocks
  165. repo-url
  166. format ast properties
  167. file content utf8-content journal?))))))
  168. (defn with-block-uuid
  169. [pages]
  170. (->> (util/distinct-by :block/name pages)
  171. (map (fn [page]
  172. (if (:block/uuid page)
  173. page
  174. (assoc page :block/uuid (db/new-block-id)))))))
  175. (defn with-ref-pages
  176. [pages blocks]
  177. (let [ref-pages (->> (mapcat :block/refs blocks)
  178. (filter :block/name))]
  179. (->> (concat pages ref-pages)
  180. (group-by :block/name)
  181. vals
  182. (map (partial apply merge))
  183. (with-block-uuid))))
  184. (defn remove-illegal-refs
  185. [block block-ids-set refresh?]
  186. (let [aux-fn (fn [refs]
  187. (let [block-refs (if refresh? (set refs)
  188. (set/intersection (set refs) block-ids-set))]
  189. (set/union
  190. (set (filter :block/name refs))
  191. block-refs)))]
  192. (-> block
  193. (update :block/refs aux-fn)
  194. (update :block/path-refs aux-fn))))
  195. ;; TODO: refactor with reset-file!
  196. (defn extract-all-blocks-pages
  197. [repo-url files metadata refresh?]
  198. (when (seq files)
  199. (-> (p/all (map
  200. (fn [{:file/keys [path content]}]
  201. (when content
  202. (let [org? (= "org" (string/lower-case (util/get-file-ext path)))
  203. content (if org?
  204. content
  205. (text/scheduled-deadline-dash->star content))
  206. utf8-content (utf8/encode content)]
  207. (extract-blocks-pages repo-url path content utf8-content))))
  208. files))
  209. (p/then (fn [result]
  210. (let [result (remove empty? result)]
  211. (when (seq result)
  212. (let [result (util/distinct-by (fn [[pages _blocks]]
  213. (let [page (first pages)]
  214. (:block/name page))) result)
  215. [pages blocks] (apply map concat result)
  216. block-ids (->> (map :block/uuid blocks)
  217. (remove nil?))
  218. pages (with-ref-pages pages blocks)
  219. blocks (map (fn [block]
  220. (let [id (:block/uuid block)
  221. properties (merge (get-in metadata [:block/properties id])
  222. (:block/properties block))]
  223. (if (seq properties)
  224. (assoc block :block/properties properties)
  225. (dissoc block :block/properties))))
  226. blocks)
  227. ;; To prevent "unique constraint" on datascript
  228. pages-index (map #(select-keys % [:block/name]) pages)
  229. block-ids-set (set (map (fn [uuid] [:block/uuid uuid]) block-ids))
  230. blocks (map #(remove-illegal-refs % block-ids-set refresh?) blocks)
  231. block-ids (map (fn [uuid] {:block/uuid uuid}) block-ids)]
  232. (apply concat [pages-index pages block-ids blocks])))))))))
  233. (defn extract-all-block-refs
  234. [content]
  235. (map second (re-seq #"\(\(([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})\)\)" content)))