extract.cljs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. (ns frontend.handler.extract
  2. "Extract helper."
  3. (:require [frontend.util :as util]
  4. [frontend.db :as db]
  5. [lambdaisland.glogi :as log]
  6. [clojure.set :as set]
  7. [frontend.utf8 :as utf8]
  8. [frontend.date :as date]
  9. [frontend.text :as text]
  10. [frontend.util.property :as property]
  11. [clojure.string :as string]
  12. [frontend.format.mldoc :as mldoc]
  13. [frontend.format.block :as block]
  14. [frontend.format :as format]
  15. [cljs-time.core :as t]
  16. [cljs-time.coerce :as tc]
  17. [medley.core :as medley]
  18. [clojure.walk :as walk]
  19. [frontend.state :as state]
  20. [frontend.config :as config]))
  21. (defn- extract-page-list
  22. [content]
  23. (when-not (string/blank? content)
  24. (->> (re-seq #"\[\[([^\]]+)]]" content)
  25. (map last)
  26. (remove nil?)
  27. (map string/lower-case)
  28. (distinct))))
  29. (defn get-page-name
  30. [file ast]
  31. ;; headline
  32. (let [ast (map first ast)]
  33. (if (string/includes? file "pages/contents.")
  34. "Contents"
  35. (let [first-block (last (first (filter block/heading-block? ast)))
  36. property-name (when (and (contains? #{"Properties" "Property_Drawer"} (ffirst ast))
  37. (not (string/blank? (:title (last (first ast))))))
  38. (:title (last (first ast))))
  39. first-block-name (let [title (last (first (:title first-block)))]
  40. (and first-block
  41. (string? title)
  42. title))
  43. file-name (when-let [file-name (last (string/split file #"/"))]
  44. (first (util/split-last "." file-name)))]
  45. (or property-name
  46. (if (= (state/page-name-order) "heading")
  47. (or first-block-name file-name)
  48. (or file-name first-block-name)))))))
  49. ;; TODO: performance improvement
  50. (defn- extract-pages-and-blocks
  51. [repo-url format ast properties file content utf8-content journal?]
  52. (try
  53. (let [now (tc/to-long (t/now))
  54. page (get-page-name file ast)
  55. [page page-name journal-day] (block/convert-page-if-journal page)
  56. blocks (->> (block/extract-blocks ast content false format)
  57. (block/with-parent-and-left [:block/name (string/lower-case page)]))
  58. ref-pages (atom #{})
  59. ref-tags (atom #{})
  60. blocks (map (fn [block]
  61. (let [block-ref-pages (seq (:block/refs block))
  62. page-lookup-ref [:block/name (string/lower-case page)]
  63. block-path-ref-pages (->> (cons page-lookup-ref (seq (:block/path-refs block)))
  64. (remove nil?))]
  65. (when block-ref-pages
  66. (swap! ref-pages set/union (set block-ref-pages)))
  67. (-> block
  68. (dissoc :ref-pages)
  69. (assoc :block/file {:file/path file}
  70. :block/format format
  71. :block/page [:block/name (string/lower-case page)]
  72. :block/refs block-ref-pages
  73. :block/path-refs block-path-ref-pages))))
  74. blocks)
  75. page-entity (let [page-file? (= page (string/lower-case file))
  76. aliases (and (:alias properties)
  77. (seq (remove #(= page %)
  78. (:alias properties))))
  79. page-list (when-let [list-content (:list properties)]
  80. (extract-page-list list-content))]
  81. (cond->
  82. (util/remove-nils
  83. (assoc
  84. (block/page-name->map page false)
  85. :block/file {:file/path file}))
  86. (seq properties)
  87. (assoc :block/properties properties)
  88. aliases
  89. (assoc :block/alias
  90. (map
  91. (fn [alias]
  92. (let [page-name (string/lower-case alias)
  93. aliases (distinct
  94. (conj
  95. (remove #{alias} aliases)
  96. page))
  97. aliases (if (seq aliases)
  98. (map
  99. (fn [alias]
  100. {:block/name (string/lower-case alias)})
  101. aliases))]
  102. (if (seq aliases)
  103. {:block/name page-name
  104. :block/alias aliases}
  105. {:block/name page-name})))
  106. aliases))
  107. (:tags properties)
  108. (assoc :block/tags (let [tags (->> (:tags properties)
  109. (remove string/blank?))]
  110. (swap! ref-tags set/union (set tags))
  111. (map (fn [tag] {:block/name (string/lower-case tag)
  112. :block/original-name tag})
  113. tags)))))
  114. pages (->> (concat
  115. [page-entity]
  116. @ref-pages
  117. (map
  118. (fn [page]
  119. {:block/original-name page
  120. :block/name (string/lower-case page)})
  121. @ref-tags))
  122. ;; remove block references
  123. (remove vector?))
  124. pages (util/distinct-by :block/name pages)
  125. block-ids (mapv (fn [block]
  126. {:block/uuid (:block/uuid block)})
  127. (remove nil? blocks))
  128. pages (remove nil? pages)
  129. pages (map (fn [page] (assoc page :block/uuid (db/new-block-id))) pages)]
  130. [pages
  131. block-ids
  132. (remove nil? blocks)])
  133. (catch js/Error e
  134. (log/error :exception e))))
  135. (defn extract-blocks-pages
  136. ([repo-url file content]
  137. (extract-blocks-pages repo-url file content (utf8/encode content)))
  138. ([repo-url file content utf8-content]
  139. (if (string/blank? content)
  140. []
  141. (let [journal? (config/journal? file)
  142. format (format/get-format file)
  143. ast (mldoc/->edn content
  144. (mldoc/default-config format))
  145. first-block (ffirst ast)
  146. properties (let [properties (and (property/properties-ast? first-block)
  147. (->> (last first-block)
  148. (map (fn [[x y]]
  149. [x (if (string? y)
  150. (property/parse-property x y)
  151. y)]))
  152. (into {})
  153. (walk/keywordize-keys)))]
  154. (when (and properties (seq properties))
  155. (if (:filters properties)
  156. (update properties :filters
  157. (fn [v]
  158. (string/replace (or v "") "\\" "")))
  159. properties)))]
  160. (extract-pages-and-blocks
  161. repo-url
  162. format ast properties
  163. file content utf8-content journal?)))))
  164. (defn with-block-uuid
  165. [pages]
  166. (->> (util/distinct-by :block/name pages)
  167. (map (fn [page]
  168. (if (:block/uuid page)
  169. page
  170. (assoc page :block/uuid (db/new-block-id)))))))
  171. (defn with-ref-pages
  172. [pages blocks]
  173. (let [ref-pages (->> (mapcat :block/refs blocks)
  174. (filter :block/name))]
  175. (->> (concat pages ref-pages)
  176. (group-by :block/name)
  177. vals
  178. (map (partial apply merge))
  179. (with-block-uuid))))
  180. (defn extract-all-blocks-pages
  181. [repo-url files metadata]
  182. (when (seq files)
  183. (let [result (->> files
  184. (map
  185. (fn [{:file/keys [path content]} contents]
  186. (println "Parsing : " path)
  187. (when content
  188. ;; TODO: remove `text/scheduled-deadline-dash->star` once migration is done
  189. (let [org? (= "org" (string/lower-case (util/get-file-ext path)))]
  190. (let [content (if org?
  191. content
  192. (text/scheduled-deadline-dash->star content))
  193. utf8-content (utf8/encode content)]
  194. (extract-blocks-pages repo-url path content utf8-content))))))
  195. (remove empty?))]
  196. (when (seq result)
  197. (let [[pages block-ids blocks] (apply map concat result)
  198. pages (with-ref-pages pages blocks)
  199. blocks (map (fn [block]
  200. (let [id (:block/uuid block)
  201. properties (get-in metadata [:block/properties id])]
  202. (update block :block/properties merge properties)))
  203. blocks)
  204. ;; To prevent "unique constraint" on datascript
  205. pages-index (map #(select-keys % [:block/name]) pages)
  206. block-ids-set (set (map (fn [{:block/keys [uuid]}] [:block/uuid uuid]) block-ids))
  207. blocks (map (fn [b]
  208. (update b :block/refs
  209. (fn [refs]
  210. (set/union
  211. (filter :block/name refs)
  212. (set/intersection (set refs) block-ids-set))))) blocks)]
  213. (apply concat [pages-index pages block-ids blocks]))))))