浏览代码

Port over frontend.format.block

Moved over util, property and mldoc. Implemented block in prep for
extract-blocks
Gabriel Horner 3 年之前
父节点
当前提交
c1c647e1cb

+ 778 - 0
src/main/logseq/graph_parser/block.cljs

@@ -0,0 +1,778 @@
+(ns logseq.graph-parser.block
+  "Modified version of frontend.format.block"
+  (:require [clojure.string :as string]
+            [clojure.walk :as walk]
+            ; [cljs.core.match :as match]
+            ; [frontend.config :as config]
+            ; [frontend.date :as date]
+            ; [frontend.format :as format]
+            ; [frontend.state :as state]
+            [logseq.graph-parser.text :as text]
+            [frontend.utf8 :as utf8]
+            [logseq.graph-parser.util :as util]
+            [logseq.graph-parser.property :as property]
+            [logseq.graph-parser.mldoc :as mldoc]
+            ; [lambdaisland.glogi :as log]
+            [datascript.core :as d]))
+
+(defn heading-block?
+  [block]
+  (and
+   (vector? block)
+   (= "Heading" (first block))))
+
+(defn get-tag
+  [block]
+  (when-let [tag-value (and (vector? block)
+                            (= "Tag" (first block))
+                            (second block))]
+    (-> tag-value
+        ;; TODO: Pull in match
+        #_(map (fn [e]
+               (match/match e
+                            ["Plain" s]
+                            s
+                            ["Link" t]
+                            (let [{full_text :full_text} t]
+                              full_text)
+                            ["Nested_link" t]
+                            (let [ {content :content} t]
+                              content)
+                            :else
+                            ""
+                            )) tag-value)
+        (string/join))))
+
+(defn get-page-reference
+  [block]
+  (let [page (cond
+               (and (vector? block) (= "Link" (first block)))
+               (let [typ (first (:url (second block)))
+                     value (second (:url (second block)))]
+                 ;; {:url ["File" "file:../pages/hello_world.org"], :label [["Plain" "hello world"]], :title nil}
+                 (or
+                  (and
+                   (= typ "Page_ref")
+                   (and (string? value)
+                        ;; TODO: Support config
+                        #_(not (or (config/local-asset? value)
+                                   (config/draw? value))))
+                   value)
+
+                  (and
+                   (= typ "Search")
+                   (text/page-ref? value)
+                   (text/page-ref-un-brackets! value))
+
+                  (and
+                   (= typ "Search")
+                   (not (contains? #{\# \* \/ \[} (first value)))
+                   (let [_ext (some-> (util/get-file-ext value) keyword)]
+                     (when (and (not (string/starts-with? value "http:"))
+                                (not (string/starts-with? value "https:"))
+                                (not (string/starts-with? value "file:"))
+                                #_(not (config/local-asset? value))
+                                #_(or (= ext :excalidraw)
+                                    (not (contains? (config/supported-formats) ext))))
+                       value)))
+
+                  (and
+                   (= typ "Complex")
+                   (= (:protocol value) "file")
+                   (:link value))
+
+                  (and
+                   (= typ "File")
+                   (second (first (:label (second block)))))))
+
+               (and (vector? block) (= "Nested_link" (first block)))
+               (let [content (:content (last block))]
+                 (subs content 2 (- (count content) 2)))
+
+               (and (vector? block)
+                    (= "Macro" (first block)))
+               (let [{:keys [name arguments]} (second block)
+                     argument (string/join ", " arguments)]
+                   (when (= name "embed")
+                     (text/page-ref-un-brackets! argument)))
+
+               (and (vector? block)
+                    (= "Tag" (first block)))
+               (let [text (get-tag block)]
+                 (text/page-ref-un-brackets! text))
+
+               :else
+               nil)]
+    (text/block-ref-un-brackets! page)))
+
+(defn get-block-reference
+  [block]
+  (when-let [block-id (cond
+                        (and (vector? block)
+                             (= "Block_reference" (first block)))
+                        (last block)
+
+                        (and (vector? block)
+                             (= "Link" (first block))
+                             (map? (second block))
+                             (= "Block_ref" (first (:url (second block)))))
+                        (second (:url (second block)))
+
+                        (and (vector? block)
+                             (= "Macro" (first block)))
+                        (let [{:keys [name arguments]} (second block)]
+                          (when (and (= name "embed")
+                                     (string? (first arguments))
+                                     (string/starts-with? (first arguments) "((")
+                                     (string/ends-with? (first arguments) "))"))
+                            (subs (first arguments) 2 (- (count (first arguments)) 2))))
+
+                        (and (vector? block)
+                             (= "Link" (first block))
+                             (map? (second block)))
+                        (if (= "id" (:protocol (second (:url (second block)))))
+                          (:link (second (:url (second block))))
+                          (let [id (second (:url (second block)))]
+                            (text/block-ref-un-brackets! id)))
+
+                        :else
+                        nil)]
+    (when (and block-id
+               (util/uuid-string? block-id))
+      block-id)))
+
+(defn paragraph-block?
+  [block]
+  (and
+   (vector? block)
+   (= "Paragraph" (first block))))
+
+(defn timestamp-block?
+  [block]
+  (and
+   (vector? block)
+   (= "Timestamp" (first block))))
+
+;; TODO: we should move this to mldoc
+(defn extract-properties
+  [format properties]
+  (when (seq properties)
+    (let [properties (seq properties)
+          page-refs (->>
+                     properties
+                     (remove (fn [[k _]]
+                               (contains? #{:background-color :background_color} (keyword k))))
+                     (map last)
+                     (map (fn [v]
+                            (when (and (string? v)
+                                       ;; TODO: Enable mldoc-link?
+                                       #_(not (mldoc/link? format v)))
+                              (let [v (string/trim v)
+                                    result (text/split-page-refs-without-brackets v {:un-brackets? false})]
+                                (if (coll? result)
+                                  (map text/page-ref-un-brackets! result)
+                                  [])))))
+                     (apply concat)
+                     (remove string/blank?))
+          properties (->> properties
+                          (map (fn [[k v]]
+                                 (let [k (-> (string/lower-case (name k))
+                                             (string/replace " " "-")
+                                             (string/replace "_" "-"))
+                                       k (if (contains? #{"custom_id" "custom-id"} k)
+                                           "id"
+                                           k)
+                                       v (if (coll? v)
+                                           (remove string/blank? v)
+                                           (if (string/blank? v)
+                                             nil
+                                             (text/parse-property format k v)))
+                                       k (keyword k)
+                                       v (if (and
+                                              (string? v)
+                                              (contains? #{:alias :aliases :tags} k))
+                                           (set [v])
+                                           v)
+                                       v (if (coll? v) (set v) v)]
+                                   [k v])))
+                          (remove #(nil? (second %))))]
+      {:properties (into {} properties)
+       :properties-order (map first properties)
+       :page-refs page-refs})))
+
+(defn- paragraph-timestamp-block?
+  [block]
+  (and (paragraph-block? block)
+       (or (timestamp-block? (first (second block)))
+           (timestamp-block? (second (second block))))))
+
+(defn extract-timestamps
+  [block]
+  (some->>
+   (second block)
+   (filter timestamp-block?)
+   (map last)
+   (into {})))
+
+;; {"Deadline" {:date {:year 2020, :month 10, :day 20}, :wday "Tue", :time {:hour 8, :min 0}, :repetition [["DoublePlus"] ["Day"] 1], :active true}}
+(defn timestamps->scheduled-and-deadline
+  [timestamps]
+  (let [timestamps (util/map-keys (comp keyword string/lower-case) timestamps)
+        m (some->> (select-keys timestamps [:scheduled :deadline])
+                   (map (fn [[k v]]
+                          (let [{:keys [date repetition]} v
+                                {:keys [year month day]} date
+                                day (js/parseInt (str year (util/zero-pad month) (util/zero-pad day)))]
+                            (cond->
+                             (case k
+                               :scheduled
+                               {:scheduled day}
+                               :deadline
+                               {:deadline day})
+                              repetition
+                              (assoc :repeated? true))))))]
+    (apply merge m)))
+
+(defn convert-page-if-journal
+  "Convert journal file name to user' custom date format"
+  [original-page-name]
+  (when original-page-name
+    (let [page-name (util/page-name-sanity-lc original-page-name)
+          ;; TODO: Enable date/* fns
+          day false #_(date/journal-title->int page-name)]
+     (if day
+       (let [original-page-name "" #_(date/int->journal-title day)]
+         [original-page-name (util/page-name-sanity-lc original-page-name) day])
+       [original-page-name page-name day]))))
+
+(defn page-name->map
+  "Create a page's map structure given a original page name (string).
+   map as input is supported for legacy compatibility.
+   with-timestamp?: assign timestampes to the map structure.
+    Useful when creating new pages from references or namespaces,
+    as there's no chance to introduce timestamps via editing in page"
+  ([original-page-name with-id?]
+   (page-name->map original-page-name with-id? true))
+  ([original-page-name with-id? with-timestamp?]
+   (cond
+     (and original-page-name (string? original-page-name))
+     (let [original-page-name (util/remove-boundary-slashes original-page-name)
+           [original-page-name page-name journal-day] (convert-page-if-journal original-page-name)
+           namespace? (and (not (boolean (text/get-nested-page-name original-page-name)))
+                           (text/namespace-page? original-page-name))
+           ;; TODO: Pass db down to this fn
+           page-entity (some-> nil (d/entity [:block/name page-name]))]
+       (merge
+        {:block/name page-name
+         :block/original-name original-page-name}
+        (when with-id?
+          (if page-entity
+            {}
+            {:block/uuid (d/squuid)}))
+        (when namespace?
+          (let [namespace (first (util/split-last "/" original-page-name))]
+            (when-not (string/blank? namespace)
+              {:block/namespace {:block/name (util/page-name-sanity-lc namespace)}})))
+        (when (and with-timestamp? (not page-entity)) ;; Only assign timestamp on creating new entity
+          ;; TODO: add current time with cljs-core
+          (let [current-ms 0 #_(util/time-ms)]
+            {:block/created-at current-ms
+             :block/updated-at current-ms}))
+        (if journal-day
+          {:block/journal? true
+           :block/journal-day journal-day}
+          {:block/journal? false})))
+
+     (and (map? original-page-name) (:block/uuid original-page-name))
+     original-page-name
+
+     (and (map? original-page-name) with-id?)
+     (assoc original-page-name :block/uuid (d/squuid))
+
+     :else
+     nil)))
+
+(defn with-page-refs
+  [{:keys [title body tags refs marker priority] :as block} with-id?]
+  (let [refs (->> (concat tags refs [marker priority])
+                  (remove string/blank?)
+                  (distinct))
+        refs (atom refs)]
+    (walk/prewalk
+     (fn [form]
+       ;; skip custom queries
+       (when-not (and (vector? form)
+                      (= (first form) "Custom")
+                      (= (second form) "query"))
+         (when-let [page (get-page-reference form)]
+           (swap! refs conj page))
+         (when-let [tag (get-tag form)]
+           (let [tag (text/page-ref-un-brackets! tag)]
+             (when (util/tag-valid? tag)
+               (swap! refs conj tag))))
+         form))
+     (concat title body))
+    (let [refs (remove string/blank? @refs)
+          children-pages (->> (mapcat (fn [p]
+                                        (let [p (if (map? p)
+                                                  (:block/original-name p)
+                                                  p)]
+                                          (when (string? p)
+                                            (let [p (or (text/get-nested-page-name p) p)]
+                                              (when (text/namespace-page? p)
+                                                (util/split-namespace-pages p))))))
+                                      refs)
+                              (remove string/blank?)
+                              (distinct))
+          refs (->> (distinct (concat refs children-pages))
+                    (remove nil?))
+          refs (map (fn [ref] (page-name->map ref with-id?)) refs)]
+      (assoc block :refs refs))))
+
+(defn with-block-refs
+  [{:keys [title body] :as block}]
+  (let [ref-blocks (atom nil)]
+    (walk/postwalk
+     (fn [form]
+       (when-let [block (get-block-reference form)]
+         (swap! ref-blocks conj block))
+       form)
+     (concat title body))
+    (let [ref-blocks (->> @ref-blocks
+                          (filter util/uuid-string?))
+          ref-blocks (map
+                       (fn [id]
+                         [:block/uuid (uuid id)])
+                       ref-blocks)
+          refs (distinct (concat (:refs block) ref-blocks))]
+      (assoc block :refs refs))))
+
+(defn block-keywordize
+  [block]
+  (util/map-keys
+   (fn [k]
+     (if (namespace k)
+       k
+       (keyword "block" k)))
+   block))
+
+(defn safe-blocks
+  [blocks]
+  (map (fn [block]
+         (if (map? block)
+           (block-keywordize (util/remove-nils block))
+           block))
+       blocks))
+
+(defn with-path-refs
+  [blocks]
+  (loop [blocks blocks
+         acc []
+         parents []]
+    (if (empty? blocks)
+      acc
+      (let [block (first blocks)
+            cur-level (:block/level block)
+            level-diff (- cur-level
+                          (get (last parents) :block/level 0))
+            [path-refs parents]
+            (cond
+              (zero? level-diff)            ; sibling
+              (let [path-refs (mapcat :block/refs (drop-last parents))
+                    parents (conj (vec (butlast parents)) block)]
+                [path-refs parents])
+
+              (> level-diff 0)              ; child
+              (let [path-refs (mapcat :block/refs parents)]
+                [path-refs (conj parents block)])
+
+              (< level-diff 0)              ; new parent
+              (let [parents (vec (take-while (fn [p] (< (:block/level p) cur-level)) parents))
+                    path-refs (mapcat :block/refs parents)]
+                [path-refs (conj parents block)]))
+            path-ref-pages (->> path-refs
+                                (concat (:block/refs block))
+                                (map (fn [ref]
+                                       (cond
+                                         (map? ref)
+                                         (:block/name ref)
+
+                                         :else
+                                         ref)))
+                                (remove string/blank?)
+                                (map (fn [ref]
+                                       (if (string? ref)
+                                         {:block/name (util/page-name-sanity-lc ref)}
+                                         ref)))
+                                (remove vector?)
+                                (remove nil?)
+                                (distinct))]
+        (recur (rest blocks)
+               (conj acc (assoc block :block/path-refs path-ref-pages))
+               parents)))))
+
+(defn block-tags->pages
+  [{:keys [tags] :as block}]
+  (if (seq tags)
+    (assoc block :tags (map (fn [tag]
+                              (let [tag (text/page-ref-un-brackets! tag)]
+                                [:block/name (util/page-name-sanity-lc tag)])) tags))
+    block))
+
+(defn- get-block-content
+  [utf8-content block format block-content]
+  (let [meta (:meta block)
+        content (or block-content
+                    (if-let [end-pos (:end-pos meta)]
+                      (utf8/substring utf8-content
+                                      (:start-pos meta)
+                                      end-pos)
+                      (utf8/substring utf8-content
+                                      (:start-pos meta))))
+        content (when content
+                  (let [content (text/remove-level-spaces content format)]
+                    (if (or (:pre-block? block)
+                            (= (:format block) :org))
+                      content
+                      (mldoc/remove-indentation-spaces content (inc (:level block)) false))))]
+    (if (= format :org)
+      content
+      (property/->new-properties content))))
+
+(defn get-custom-id-or-new-id
+  [properties]
+  (or (when-let [custom-id (or (get-in properties [:properties :custom-id])
+                               (get-in properties [:properties :custom_id])
+                               (get-in properties [:properties :id]))]
+        (let [custom-id (and (string? custom-id) (string/trim custom-id))]
+          (when (and custom-id (util/uuid-string? custom-id))
+            (uuid custom-id))))
+      (d/squuid)))
+
+(defn get-page-refs-from-properties
+  [properties]
+  (let [page-refs (mapcat (fn [v] (cond
+                                   (coll? v)
+                                   v
+
+                                   (text/page-ref? v)
+                                   [(text/page-ref-un-brackets! v)]
+
+                                   :else
+                                   nil)) (vals properties))
+        page-refs (remove string/blank? page-refs)]
+    (map (fn [page] (page-name->map page true)) page-refs)))
+
+(defn with-page-block-refs
+  [block with-id?]
+  (some-> block
+          (with-page-refs with-id?)
+          with-block-refs
+          block-tags->pages
+          (update :refs (fn [col] (remove nil? col)))))
+
+(defn extract-blocks*
+  [blocks body pre-block-properties encoded-content with-body?]
+  (let [first-block (first blocks)
+        first-block-start-pos (get-in first-block [:block/meta :start-pos])
+        blocks (if (or (> first-block-start-pos 0)
+                       (empty? blocks))
+                 (cons
+                  (merge
+                   (let [content (utf8/substring encoded-content 0 first-block-start-pos)
+                         {:keys [properties properties-order]} @pre-block-properties
+                         id (get-custom-id-or-new-id {:properties properties})
+                         property-refs (->> (get-page-refs-from-properties properties)
+                                            (map :block/original-name))
+                         block {:uuid id
+                                :content content
+                                :level 1
+                                :meta {:start-pos 0
+                                       :end-pos (or first-block-start-pos
+                                                    (utf8/length encoded-content))}
+                                :properties properties
+                                :properties-order properties-order
+                                :refs property-refs
+                                :pre-block? true
+                                :unordered true
+                                :body body}
+                         block (with-page-block-refs block false)]
+                     (block-keywordize block))
+                   (select-keys first-block [:block/format :block/page]))
+                  blocks)
+                 blocks)
+        blocks (map (fn [block]
+                      (if with-body?
+                        block
+                        (dissoc block :block/body))) blocks)]
+    (with-path-refs blocks)))
+
+(defn ^:large-vars/cleanup-todo extract-blocks
+  ([blocks content with-id? format]
+   (extract-blocks blocks content with-id? format false))
+  ([blocks content with-id? format with-body?]
+   (try
+    (let [encoded-content (utf8/encode content)
+          last-pos (utf8/length encoded-content)
+          pre-block-properties (atom nil)
+          [blocks body]
+          (loop [headings []
+                 blocks (reverse blocks)
+                 timestamps {}
+                 properties {}
+                 last-pos last-pos
+                 last-level 1000
+                 children []
+                 block-all-content []
+                 body []]
+            (if (seq blocks)
+              (let [[block {:keys [start_pos _end_pos] :as block-content}] (first blocks)
+                    block-content (when (string? block-content) block-content)
+                    unordered? (:unordered (second block))
+                    markdown-heading? (and (:size (second block)) (= :markdown format))]
+                (cond
+                  (paragraph-timestamp-block? block)
+                  (let [timestamps (extract-timestamps block)
+                        timestamps' (merge timestamps timestamps)]
+                    (recur headings (rest blocks) timestamps' properties last-pos last-level children (conj block-all-content block-content) body))
+
+                  (property/properties-ast? block)
+                  (let [properties (extract-properties format (second block))]
+                    (recur headings (rest blocks) timestamps properties last-pos last-level children (conj block-all-content block-content) body))
+
+                  (heading-block? block)
+                  (let [id (get-custom-id-or-new-id properties)
+                        ref-pages-in-properties (->> (:page-refs properties)
+                                                     (remove string/blank?))
+                        block (second block)
+                        block (if markdown-heading?
+                                (assoc block
+                                       :type :heading
+                                       :level (if unordered? (:level block) 1)
+                                       :heading-level (or (:size block) 6))
+                                block)
+                        level (:level block)
+                        [children current-block-children]
+                        (cond
+                          (< level last-level)
+                          (let [current-block-children (set (->> (filter #(< level (second %)) children)
+                                                                 (map first)
+                                                                 (map (fn [id]
+                                                                        [:block/uuid id]))))
+                                others (vec (remove #(< level (second %)) children))]
+                            [(conj others [id level])
+                             current-block-children])
+
+                          (>= level last-level)
+                          [(conj children [id level])
+                           #{}])
+                        block (cond->
+                                (assoc block
+                                       :uuid id
+                                       :refs ref-pages-in-properties
+                                       :children (or current-block-children [])
+                                       :format format)
+                                (seq (:properties properties))
+                                (assoc :properties (:properties properties))
+
+                                (seq (:properties-order properties))
+                                (assoc :properties-order (:properties-order properties)))
+                        block (if (get-in block [:properties :collapsed])
+                                (assoc block :collapsed? true)
+                                block)
+                        block (-> block
+                                  (assoc-in [:meta :start-pos] start_pos)
+                                  (assoc-in [:meta :end-pos] last-pos)
+                                  ((fn [block]
+                                     (assoc block
+                                            :content (get-block-content encoded-content block format (and block-content (string/join "\n" (reverse (conj block-all-content block-content)))))))))
+                        block (if (seq timestamps)
+                                (merge block (timestamps->scheduled-and-deadline timestamps))
+                                block)
+                        block (assoc block :body body)
+                        block (with-page-block-refs block with-id?)
+                        last-pos' (get-in block [:meta :start-pos])
+                        {:keys [created-at updated-at]} (:properties properties)
+                        block (cond-> block
+                                (and created-at (integer? created-at))
+                                (assoc :block/created-at created-at)
+
+                                (and updated-at (integer? updated-at))
+                                (assoc :block/updated-at updated-at))]
+                    (recur (conj headings block) (rest blocks) {} {} last-pos' (:level block) children [] []))
+
+                  :else
+                  (recur headings (rest blocks) timestamps properties last-pos last-level children
+                         (conj block-all-content block-content)
+                         (conj body block))))
+              (do
+                (when (seq properties)
+                  (reset! pre-block-properties properties))
+                [(-> (reverse headings)
+                     safe-blocks) body])))]
+      (extract-blocks* blocks body pre-block-properties encoded-content with-body?))
+    (catch js/Error _e
+      (js/console.error "extract-blocks-failed")
+      #_(log/error :exception e)))))
+
+(defn with-parent-and-left
+  [page-id blocks]
+  (loop [blocks (map (fn [block] (assoc block :block/level-spaces (:block/level block))) blocks)
+         parents [{:page/id page-id     ; db id or a map {:block/name "xxx"}
+                   :block/level 0
+                   :block/level-spaces 0}]
+         _sibling nil
+         result []]
+    (if (empty? blocks)
+      (map #(dissoc % :block/level-spaces) result)
+      (let [[block & others] blocks
+            level-spaces (:block/level-spaces block)
+            {:block/keys [uuid level parent] :as last-parent} (last parents)
+            parent-spaces (:block/level-spaces last-parent)
+            [blocks parents sibling result]
+            (cond
+              (= level-spaces parent-spaces)        ; sibling
+              (let [block (assoc block
+                                 :block/parent parent
+                                 :block/left [:block/uuid uuid]
+                                 :block/level level)
+                    parents' (conj (vec (butlast parents)) block)
+                    result' (conj result block)]
+                [others parents' block result'])
+
+              (> level-spaces parent-spaces)         ; child
+              (let [parent (if uuid [:block/uuid uuid] (:page/id last-parent))
+                    block (cond->
+                            (assoc block
+                                  :block/parent parent
+                                  :block/left parent)
+                            ;; fix block levels with wrong order
+                            ;; For example:
+                            ;;   - a
+                            ;; - b
+                            ;; What if the input indentation is two spaces instead of 4 spaces
+                            (>= (- level-spaces parent-spaces) 1)
+                            (assoc :block/level (inc level)))
+                    parents' (conj parents block)
+                    result' (conj result block)]
+                [others parents' block result'])
+
+              (< level-spaces parent-spaces)
+              (cond
+                (some #(= (:block/level-spaces %) (:block/level-spaces block)) parents) ; outdent
+                (let [parents' (vec (filter (fn [p] (<= (:block/level-spaces p) level-spaces)) parents))
+                      left (last parents')
+                      blocks (cons (assoc (first blocks)
+                                          :block/level (dec level)
+                                          :block/left [:block/uuid (:block/uuid left)])
+                                   (rest blocks))]
+                  [blocks parents' left result])
+
+                :else
+                (let [[f r] (split-with (fn [p] (<= (:block/level-spaces p) level-spaces)) parents)
+                      left (first r)
+                      parent-id (if-let [block-id (:block/uuid (last f))]
+                                  [:block/uuid block-id]
+                                  page-id)
+                      block (cond->
+                              (assoc block
+                                     :block/parent parent-id
+                                     :block/left [:block/uuid (:block/uuid left)]
+                                     :block/level (:block/level left)
+                                     :block/level-spaces (:block/level-spaces left)))
+
+                      parents' (->> (concat f [block]) vec)
+                      result' (conj result block)]
+                  [others parents' block result'])))]
+        (recur blocks parents sibling result)))))
+;
+; (defn parse-block
+;   ([block]
+;    (parse-block block nil))
+;   ([{:block/keys [uuid content page format] :as block} {:keys [with-id?]
+;                                                         :or {with-id? true}}]
+;    (when-not (string/blank? content)
+;      (let [block (dissoc block :block/pre-block?)
+;            ast (format/to-edn content format nil)
+;            blocks (extract-blocks ast content with-id? format)
+;            new-block (first blocks)
+;            parent-refs (->> (db/get-block-parent (state/get-current-repo) uuid)
+;                             :block/path-refs
+;                             (map :db/id))
+;            {:block/keys [refs]} new-block
+;            ref-pages (filter :block/name refs)
+;            path-ref-pages (->> (concat ref-pages parent-refs [(:db/id page)])
+;                                (remove nil?))
+;            block (cond->
+;                    (merge
+;                     block
+;                     new-block
+;                     {:block/path-refs path-ref-pages})
+;                    (> (count blocks) 1)
+;                    (assoc :block/warning :multiple-blocks))
+;            block (dissoc block :block/title :block/body :block/level)]
+;        (if uuid (assoc block :block/uuid uuid) block)))))
+;
+; (defn parse-title-and-body
+;   ([block]
+;    (when (map? block)
+;      (merge block
+;             (parse-title-and-body (:block/uuid block)
+;                                   (:block/format block)
+;                                   (:block/pre-block? block)
+;                                   (:block/content block)))))
+;   ([block-uuid format pre-block? content]
+;    (when-not (string/blank? content)
+;      (let [content (if pre-block? content
+;                        (str (config/get-block-pattern format) " " (string/triml content)))]
+;        (if-let [result (state/get-block-ast block-uuid content)]
+;          result
+;          (let [ast (->> (format/to-edn content format (mldoc/default-config format))
+;                         (map first))
+;                title (when (heading-block? (first ast))
+;                        (:title (second (first ast))))
+;                body (vec (if title (rest ast) ast))
+;                body (drop-while property/properties-ast? body)
+;                result (cond->
+;                         (if (seq body) {:block/body body} {})
+;                         title
+;                         (assoc :block/title title))]
+;            (state/add-block-ast-cache! block-uuid content result)
+;            result))))))
+;
+; (defn macro-subs
+;   [macro-content arguments]
+;   (loop [s macro-content
+;          args arguments
+;          n 1]
+;     (if (seq args)
+;       (recur
+;        (string/replace s (str "$" n) (first args))
+;        (rest args)
+;        (inc n))
+;       s)))
+;
+; (defn break-line-paragraph?
+;   [[typ break-lines]]
+;   (and (= typ "Paragraph")
+;        (every? #(= % ["Break_Line"]) break-lines)))
+;
+; (defn trim-paragraph-special-break-lines
+;   [ast]
+;   (let [[typ paras] ast]
+;     (if (= typ "Paragraph")
+;       (let [indexed-paras (map-indexed vector paras)]
+;         [typ (->> (filter
+;                             #(let [[index value] %]
+;                                (not (and (> index 0)
+;                                          (= value ["Break_Line"])
+;                                          (contains? #{"Timestamp" "Macro"}
+;                                                     (first (nth paras (dec index)))))))
+;                             indexed-paras)
+;                            (map #(last %)))])
+;       ast)))
+;
+; (defn trim-break-lines!
+;   [ast]
+;   (drop-while break-line-paragraph?
+;               (map trim-paragraph-special-break-lines ast)))

+ 29 - 311
src/main/frontend/handler/graph_parser.cljs → src/main/logseq/graph_parser/cli.cljs

@@ -1,4 +1,5 @@
-(ns frontend.handler.graph-parser
+(ns logseq.graph-parser.cli
+  "Main ns for graph parsing CLI"
   (:require [clojure.string :as string]
             [clojure.set :as set]
             [clojure.edn :as edn]
@@ -7,7 +8,11 @@
             ["fs" :as fs]
             ["child_process" :as child-process]
             [frontend.db-schema :as db-schema]
-            [frontend.format.mldoc-slim :as mldoc]
+            [logseq.graph-parser.mldoc :as mldoc]
+            [logseq.graph-parser.util :as util]
+            [logseq.graph-parser.property :as property]
+            [logseq.graph-parser.text :as text]
+            [logseq.graph-parser.block :as block]
             ;; Disable for now since kondo can't pick it up
             ; #?(:org.babashka/nbb [nbb.core :as nbb])
             [nbb.core :as nbb]))
@@ -47,89 +52,6 @@
 ;; Copied helpers
 ;; ==============
 
-;; from: frontend.util
-;; =====
-(defn remove-nils
-  "remove pairs of key-value that has nil value from a (possibly nested) map."
-  [nm]
-  (walk/postwalk
-   (fn [el]
-     (if (map? el)
-       (into {} (remove (comp nil? second)) el)
-       el))
-   nm))
-
-(defn path-normalize
-  "Normalize file path (for reading paths from FS, not required by writting)"
-  [s]
-  (.normalize s "NFC"))
-
-(defn distinct-by
-  [f col]
-  (reduce
-   (fn [acc x]
-     (if (some #(= (f x) (f %)) acc)
-       acc
-       (vec (conj acc x))))
-   []
-   col))
-
-(defn split-last [pattern s]
-  (when-let [last-index (string/last-index-of s pattern)]
-    [(subs s 0 last-index)
-     (subs s (+ last-index (count pattern)) (count s))]))
-
-(defn get-file-ext
-  [file]
-  (and
-   (string? file)
-   (string/includes? file ".")
-   (some-> (last (string/split file #"\.")) string/lower-case)))
-
-(defn split-namespace-pages
-  [title]
-  (let [parts (string/split title "/")]
-    (loop [others (rest parts)
-           result [(first parts)]]
-      (if (seq others)
-        (let [prev (last result)]
-          (recur (rest others)
-                 (conj result (str prev "/" (first others)))))
-        result))))
-
-(defn remove-boundary-slashes
-  [s]
-  (when (string? s)
-    (let [s (if (= \/ (first s))
-              (subs s 1)
-              s)]
-      (if (= \/ (last s))
-        (subs s 0 (dec (count s)))
-        s))))
-
-(def windows-reserved-chars #"[:\\*\\?\"<>|]+")
-
-(defn page-name-sanity
-  "Sanitize the page-name for file name (strict), for file writting"
-  ([page-name]
-   (page-name-sanity page-name false))
-  ([page-name replace-slash?]
-   (let [page (some-> page-name
-                      (remove-boundary-slashes)
-                      ;; Windows reserved path characters
-                      (string/replace windows-reserved-chars "_")
-                      ;; for android filesystem compatiblity
-                      (string/replace #"[\\#|%]+" "_")
-                      (path-normalize))]
-     (if replace-slash?
-       (string/replace page #"/" ".")
-       page))))
-
-(defn page-name-sanity-lc
-  "Sanitize the query string for a page name (mandate for :block/name)"
-  [s]
-  (page-name-sanity (string/lower-case s)))
-
 ;; from: frontend.db.model
 ;; =====
 (defn get-all-pages
@@ -217,7 +139,7 @@
 
                  :else
                  relative-path)]
-      (path-normalize path))))
+      (util/path-normalize path))))
 
 (def app-name "logseq")
 (def pages-metadata-file "pages-metadata.edn")
@@ -269,7 +191,7 @@
 ;; ====
 (defn with-block-uuid
   [pages]
-  (->> (distinct-by :block/name pages)
+  (->> (util/distinct-by :block/name pages)
        (map (fn [page]
               (if (:block/uuid page)
                 page
@@ -285,217 +207,13 @@
          (map (partial apply merge))
          (with-block-uuid))))
 
-;; from: frontend.util.property
-(defn properties-ast?
-  [block]
-  (and
-   (vector? block)
-   (contains? #{"Property_Drawer" "Properties"}
-              (first block))))
-
-;; from: frontend.text
-;; =====
-
-(def page-ref-re-without-nested #"\[\[([^\[\]]+)\]\]")
-
-(defn get-nested-page-name
-  [page-name]
-  (when-let [first-match (re-find page-ref-re-without-nested page-name)]
-    (second first-match)))
-
-(defn namespace-page?
-  [p]
-  (and (string? p)
-       (string/includes? p "/")
-       (not (string/starts-with? p "../"))
-       (not (string/starts-with? p "./"))
-       ;; TODO: Pull in util/url
-       #_(not (util/url? p))))
-
-(defonce non-parsing-properties
-  (atom #{"background-color" "background_color"}))
-
-;; TODO: Enable most of the property cases
-(defn parse-property
-  ([k v]
-   (parse-property :markdown k v))
-  ([_format k v]
-   (let [k (name k)
-         v (if (or (symbol? v) (keyword? v)) (name v) (str v))
-         v (string/trim v)]
-     (cond
-       ; (contains? (set/union
-       ;             #{"title" "filters"}
-       ;             (get (state/get-config) :ignored-page-references-keywords)) k)
-       ; v
-
-       (= v "true")
-       true
-       (= v "false")
-       false
-
-       ; (and (not= k "alias") (util/safe-re-find #"^\d+$" v))
-       ; (util/safe-parse-int v)
-
-       ; (util/wrapped-by-quotes? v) ; wrapped in ""
-       ; v
-
-       (contains? @non-parsing-properties (string/lower-case k))
-       v
-
-       ; (mldoc/link? format v)
-       ; v
-
-       #_:else
-       #_(split-page-refs-without-brackets v)))))
-
-;; from: frontend.format.block
-;; =====
-(defn heading-block?
-  [block]
-  (and
-   (vector? block)
-   (= "Heading" (first block))))
-
-(defn convert-page-if-journal
-  "Convert journal file name to user' custom date format"
-  [original-page-name]
-  (when original-page-name
-    (let [page-name (page-name-sanity-lc original-page-name)
-          ;; TODO: Enable date/* fns
-          day false #_(date/journal-title->int page-name)]
-     (if day
-       (let [original-page-name "" #_(date/int->journal-title day)]
-         [original-page-name (page-name-sanity-lc original-page-name) day])
-       [original-page-name page-name day]))))
-
-(defn with-parent-and-left
-  [page-id blocks]
-  (loop [blocks (map (fn [block] (assoc block :block/level-spaces (:block/level block))) blocks)
-         parents [{:page/id page-id     ; db id or a map {:block/name "xxx"}
-                   :block/level 0
-                   :block/level-spaces 0}]
-         _sibling nil
-         result []]
-    (if (empty? blocks)
-      (map #(dissoc % :block/level-spaces) result)
-      (let [[block & others] blocks
-            level-spaces (:block/level-spaces block)
-            {:block/keys [uuid level parent] :as last-parent} (last parents)
-            parent-spaces (:block/level-spaces last-parent)
-            [blocks parents sibling result]
-            (cond
-              (= level-spaces parent-spaces)        ; sibling
-              (let [block (assoc block
-                                 :block/parent parent
-                                 :block/left [:block/uuid uuid]
-                                 :block/level level)
-                    parents' (conj (vec (butlast parents)) block)
-                    result' (conj result block)]
-                [others parents' block result'])
-
-              (> level-spaces parent-spaces)         ; child
-              (let [parent (if uuid [:block/uuid uuid] (:page/id last-parent))
-                    block (cond->
-                            (assoc block
-                                  :block/parent parent
-                                  :block/left parent)
-                            ;; fix block levels with wrong order
-                            ;; For example:
-                            ;;   - a
-                            ;; - b
-                            ;; What if the input indentation is two spaces instead of 4 spaces
-                            (>= (- level-spaces parent-spaces) 1)
-                            (assoc :block/level (inc level)))
-                    parents' (conj parents block)
-                    result' (conj result block)]
-                [others parents' block result'])
-
-              (< level-spaces parent-spaces)
-              (cond
-                (some #(= (:block/level-spaces %) (:block/level-spaces block)) parents) ; outdent
-                (let [parents' (vec (filter (fn [p] (<= (:block/level-spaces p) level-spaces)) parents))
-                      left (last parents')
-                      blocks (cons (assoc (first blocks)
-                                          :block/level (dec level)
-                                          :block/left [:block/uuid (:block/uuid left)])
-                                   (rest blocks))]
-                  [blocks parents' left result])
-
-                :else
-                (let [[f r] (split-with (fn [p] (<= (:block/level-spaces p) level-spaces)) parents)
-                      left (first r)
-                      parent-id (if-let [block-id (:block/uuid (last f))]
-                                  [:block/uuid block-id]
-                                  page-id)
-                      block (cond->
-                              (assoc block
-                                     :block/parent parent-id
-                                     :block/left [:block/uuid (:block/uuid left)]
-                                     :block/level (:block/level left)
-                                     :block/level-spaces (:block/level-spaces left)))
-
-                      parents' (->> (concat f [block]) vec)
-                      result' (conj result block)]
-                  [others parents' block result'])))]
-        (recur blocks parents sibling result)))))
-
-(defn page-name->map
-  "Create a page's map structure given a original page name (string).
-   map as input is supported for legacy compatibility.
-   with-timestamp?: assign timestampes to the map structure.
-    Useful when creating new pages from references or namespaces,
-    as there's no chance to introduce timestamps via editing in page"
-  ([original-page-name with-id?]
-   (page-name->map original-page-name with-id? true))
-  ([original-page-name with-id? with-timestamp?]
-   (cond
-     (and original-page-name (string? original-page-name))
-     (let [original-page-name (remove-boundary-slashes original-page-name)
-           [original-page-name page-name journal-day] (convert-page-if-journal original-page-name)
-           namespace? (and (not (boolean (get-nested-page-name original-page-name)))
-                           (namespace-page? original-page-name))
-           ;; TODO: Pass db down to this fn
-           page-entity (some-> nil (d/entity [:block/name page-name]))]
-       (merge
-        {:block/name page-name
-         :block/original-name original-page-name}
-        (when with-id?
-          (if page-entity
-            {}
-            {:block/uuid (d/squuid)}))
-        (when namespace?
-          (let [namespace (first (split-last "/" original-page-name))]
-            (when-not (string/blank? namespace)
-              {:block/namespace {:block/name (page-name-sanity-lc namespace)}})))
-        (when (and with-timestamp? (not page-entity)) ;; Only assign timestamp on creating new entity
-          ;; TODO: add current time with cljs-core
-          (let [current-ms 0 #_(util/time-ms)]
-            {:block/created-at current-ms
-             :block/updated-at current-ms}))
-        (if journal-day
-          {:block/journal? true
-           :block/journal-day journal-day}
-          {:block/journal? false})))
-
-     (and (map? original-page-name) (:block/uuid original-page-name))
-     original-page-name
-
-     (and (map? original-page-name) with-id?)
-     (assoc original-page-name :block/uuid (d/squuid))
-
-     :else
-     nil)))
-
-;; from: frontend.handler.extract
-;; =====
 (defn get-page-name
   [file ast]
   ;; headline
   (let [ast (map first ast)]
     (if (string/includes? file "pages/contents.")
       "Contents"
-      (let [first-block (last (first (filter heading-block? ast)))
+      (let [first-block (last (first (filter block/heading-block? ast)))
             property-name (when (and (contains? #{"Properties" "Property_Drawer"} (ffirst ast))
                                      (not (string/blank? (:title (last (first ast))))))
                             (:title (last (first ast))))
@@ -504,8 +222,8 @@
                                     (string? title)
                                     title))
             file-name (when-let [file-name (last (string/split file #"/"))]
-                        (let [result (first (split-last "." file-name))]
-                          (if (mldoc-support? (string/lower-case (get-file-ext file)))
+                        (let [result (first (util/split-last "." file-name))]
+                          (if (mldoc-support? (string/lower-case (util/get-file-ext file)))
                             (string/replace result "." "/")
                             result)))]
         (or property-name
@@ -523,9 +241,9 @@
   [repo-url format ast properties file content]
   (try
     (let [page (get-page-name file ast)
-          [_original-page-name page-name _journal-day] (convert-page-if-journal page)
+          [_original-page-name page-name _journal-day] (block/convert-page-if-journal page)
           blocks (->> (extract-blocks ast content false format)
-                      (with-parent-and-left {:block/name page-name}))
+                      (block/with-parent-and-left {:block/name page-name}))
           ref-pages (atom #{})
           ref-tags (atom #{})
           blocks (map (fn [block]
@@ -545,13 +263,13 @@
           page-entity (let [alias (:alias properties)
                             alias (if (string? alias) [alias] alias)
                             aliases (and alias
-                                         (seq (remove #(or (= page-name (page-name-sanity-lc %))
+                                         (seq (remove #(or (= page-name (util/page-name-sanity-lc %))
                                                            (string/blank? %)) ;; disable blank alias
                                                       alias)))
                             aliases (->>
                                      (map
                                        (fn [alias]
-                                         (let [page-name (page-name-sanity-lc alias)
+                                         (let [page-name (util/page-name-sanity-lc alias)
                                                aliases (distinct
                                                         (conj
                                                          (remove #{alias} aliases)
@@ -559,7 +277,7 @@
                                                aliases (when (seq aliases)
                                                          (map
                                                            (fn [alias]
-                                                             {:block/name (page-name-sanity-lc alias)})
+                                                             {:block/name (util/page-name-sanity-lc alias)})
                                                            aliases))]
                                            (if (seq aliases)
                                              {:block/name page-name
@@ -568,10 +286,10 @@
                                        aliases)
                                      (remove nil?))]
                         (cond->
-                         (remove-nils
+                         (util/remove-nils
                             (assoc
-                             (page-name->map page false)
-                             :block/file {:file/path (path-normalize file)}))
+                             (block/page-name->map page false)
+                             :block/file {:file/path (util/path-normalize file)}))
                          (seq properties)
                          (assoc :block/properties properties)
 
@@ -583,14 +301,14 @@
                                                   tags (if (string? tags) [tags] tags)
                                                   tags (remove string/blank? tags)]
                                               (swap! ref-tags set/union (set tags))
-                                              (map (fn [tag] {:block/name (page-name-sanity-lc tag)
+                                              (map (fn [tag] {:block/name (util/page-name-sanity-lc tag)
                                                               :block/original-name tag})
                                                    tags)))))
           namespace-pages (let [page (:block/original-name page-entity)]
-                            (when (namespace-page? page)
-                              (->> (split-namespace-pages page)
+                            (when (text/namespace-page? page)
+                              (->> (util/split-namespace-pages page)
                                    (map (fn [page]
-                                          (-> (page-name->map page true)
+                                          (-> (block/page-name->map page true)
                                               (assoc :block/format format)))))))
           pages (->> (concat
                       [page-entity]
@@ -598,13 +316,13 @@
                       (map
                         (fn [page]
                           {:block/original-name page
-                           :block/name (page-name-sanity-lc page)})
+                           :block/name (util/page-name-sanity-lc page)})
                         @ref-tags)
                       namespace-pages)
                      ;; remove block references
                      (remove vector?)
                      (remove nil?))
-          pages (distinct-by :block/name pages)
+          pages (util/distinct-by :block/name pages)
           pages (remove nil? pages)
           pages (map (fn [page] (assoc page :block/uuid (d/squuid))) pages)
           blocks (->> (remove nil? blocks)
@@ -625,11 +343,11 @@
                                                          ))]
       (println "Parsing finished : " file)
       (let [first-block (ffirst ast)
-            properties (let [properties (and (properties-ast? first-block)
+            properties (let [properties (and (property/properties-ast? first-block)
                                              (->> (last first-block)
                                                   (map (fn [[x y]]
                                                          [x (if (string? y)
-                                                              (parse-property format x y)
+                                                              (text/parse-property format x y)
                                                               y)]))
                                                   (into {})
                                                   (walk/keywordize-keys)))]
@@ -661,7 +379,7 @@
 
                :else
                file)
-        file (path-normalize file)
+        file (util/path-normalize file)
         new? (nil? (d/entity @conn [:file/path file]))]
     (d/transact! conn [{:file/path file :file/content content}])
     (let [format (get-format file)

+ 6 - 36
src/main/frontend/format/mldoc_slim.cljs → src/main/logseq/graph_parser/mldoc.cljs

@@ -1,45 +1,16 @@
-(ns frontend.format.mldoc-slim
+(ns logseq.graph-parser.mldoc
   (:require [cljs-bean.core :as bean]
             [clojure.string :as string]
             ; [frontend.format.protocol :as protocol]
             [frontend.utf8 :as utf8]
-            ; [frontend.util :as util]
             [goog.object :as gobj]
+            [logseq.graph-parser.util :as util]
             ; [lambdaisland.glogi :as log]
             ; [medley.core :as medley]
             ["mldoc$default" :as mldoc :refer [Mldoc]]
             ; [linked.core :as linked]
             #_[frontend.config :as config]))
 
-;; from: frontend.util
-;; ====
-(defn json->clj
-  ([json-string]
-   (json->clj json-string false))
-  ([json-string kebab?]
-   (let [m (-> json-string
-               (js/JSON.parse)
-               (js->clj :keywordize-keys true))]
-     (if kebab?
-       m
-       #_(cske/transform-keys csk/->kebab-case-keyword m)
-       m))))
-
-(defn safe-subs
-  ([s start]
-   (let [c (count s)]
-     (safe-subs s start c)))
-  ([s start end]
-   (let [c (count s)]
-     (subs s (min c start) (min c end)))))
-
-(defn split-first [pattern s]
-  (when-let [first-index (string/index-of s pattern)]
-    [(subs s 0 first-index)
-     (subs s (+ first-index (count pattern)) (count s))]))
-
-;; Normal mldoc below
-;; ============
 (defonce parseJson (gobj/get Mldoc "parseJson"))
 (defonce parseInlineJson (gobj/get Mldoc "parseInlineJson"))
 (defonce parseOPML (gobj/get Mldoc "parseOPML"))
@@ -121,8 +92,8 @@
   (let [lines (string/split-lines s)
         [f & r] lines
         body (map (fn [line]
-                    (if (string/blank? (safe-subs line 0 level))
-                      (safe-subs line level)
+                    (if (string/blank? (util/safe-subs line 0 level))
+                      (util/safe-subs line level)
                       line))
                (if remove-first-line? lines r))
         content (if remove-first-line? body (cons f body))]
@@ -167,7 +138,7 @@
                    (->>
                     (map
                      (fn [[_ v]]
-                       (let [[k v] (split-first " " v)]
+                       (let [[k v] (util/split-first " " v)]
                          (mapv
                           string/trim
                           [k v])))
@@ -194,7 +165,6 @@
           properties (-> properties
                          (update :filetags (constantly filetags)))
           ; properties (medley/remove-kv (fn [_k v] (or (nil? v) (and (coll? v) (empty? v)))) properties)
-          ;; TODO: bring in medley?
           properties (into {} (remove (fn [[_k v]] (or (nil? v) (and (coll? v) (empty? v)))) properties))]
       (if (seq properties)
         (cons [["Properties" properties] nil] other-ast)
@@ -233,7 +203,7 @@
         []
         (-> content
             (parse-json config)
-            (json->clj)
+            (util/json->clj)
             (update-src-full-content content)
             (collect-page-properties parse-property)))
       (catch js/Error e

+ 48 - 0
src/main/logseq/graph_parser/property.cljs

@@ -0,0 +1,48 @@
+(ns logseq.graph-parser.property
+  "Property fns that only rely on clojurescript fns. Fns from frontend.util.property"
+  (:require [logseq.graph-parser.util :as util]
+            [clojure.string :as string]
+            [goog.string :as gstring]))
+
+(defn properties-ast?
+  [block]
+  (and
+   (vector? block)
+   (contains? #{"Property_Drawer" "Properties"}
+              (first block))))
+
+(defonce properties-start ":PROPERTIES:")
+(defonce properties-end ":END:")
+(defonce properties-end-pattern
+  (re-pattern (gstring/format "%s[\t\r ]*\n|(%s\\s*$)" properties-end properties-end)))
+
+(defn contains-properties?
+  [content]
+  (when content
+    (and (string/includes? content properties-start)
+         (util/safe-re-find properties-end-pattern content))))
+
+(defn ->new-properties
+  "New syntax: key:: value"
+  [content]
+  (if (contains-properties? content)
+    (let [lines (string/split-lines content)
+          start-idx (.indexOf lines properties-start)
+          end-idx (.indexOf lines properties-end)]
+      (if (and (>= start-idx 0) (> end-idx 0) (> end-idx start-idx))
+        (let [before (subvec lines 0 start-idx)
+              middle (->> (subvec lines (inc start-idx) end-idx)
+                          (map (fn [text]
+                                 (let [[k v] (util/split-first ":" (subs text 1))]
+                                   (if (and k v)
+                                     (let [k (string/replace k "_" "-")
+                                           compare-k (keyword (string/lower-case k))
+                                           k (if (contains? #{:id :custom_id :custom-id} compare-k) "id" k)
+                                           k (if (contains? #{:last-modified-at} compare-k) "updated-at" k)]
+                                       (str k ":: " (string/trim v)))
+                                     text)))))
+              after (subvec lines (inc end-idx))
+              lines (concat before middle after)]
+          (string/join "\n" lines))
+        content))
+    content))

+ 380 - 0
src/main/logseq/graph_parser/text.cljs

@@ -0,0 +1,380 @@
+(ns logseq.graph-parser.text
+  "Modified version of frontend.text"
+  (:require [logseq.graph-parser.util :as util]
+            ; ["/frontend/utils" :as utils]
+            [goog.string :as gstring]
+            [clojure.string :as string]
+            ; [frontend.format.mldoc :as mldoc]
+            [clojure.set :as set]))
+
+(def page-ref-re-0 #"\[\[(.*)\]\]")
+(def org-page-ref-re #"\[\[(file:.*)\]\[.+?\]\]")
+(def markdown-page-ref-re #"\[(.*)\]\(file:.*\)")
+
+; (defonce ^js node-path utils/nodePath)
+
+;; TODO: Load frontend/utils.js
+(defn get-file-basename
+  [path]
+  path
+  #_(when-not (string/blank? path)
+      (node-path.name path)))
+
+(defn get-page-name
+  [s]
+  (and (string? s)
+       (or (when-let [[_ label _path] (re-matches markdown-page-ref-re s)]
+             (string/trim label))
+           (when-let [[_ path _label] (re-matches org-page-ref-re s)]
+             (some-> (get-file-basename path)
+                     (string/replace "." "/")))
+           (-> (re-matches page-ref-re-0 s)
+               second))))
+
+(defn page-ref?
+  [s]
+  (and
+   (string? s)
+   (string/starts-with? s "[[")
+   (string/ends-with? s "]]")))
+
+(def block-ref-re #"\(\(([a-zA-z0-9]{8}-[a-zA-z0-9]{4}-[a-zA-z0-9]{4}-[a-zA-z0-9]{4}-[a-zA-z0-9]{12})\)\)")
+
+(defn get-block-ref
+  [s]
+  (and (string? s)
+       (second (re-matches block-ref-re s))))
+
+(defn block-ref?
+  [s]
+  (boolean (get-block-ref s)))
+
+(defonce page-ref-re #"\[\[(.*?)\]\]")
+
+(defonce page-ref-re-2 #"(\[\[.*?\]\])")
+
+(def page-ref-re-without-nested #"\[\[([^\[\]]+)\]\]")
+
+(defonce between-re #"\(between ([^\)]+)\)")
+
+(defn page-ref-un-brackets!
+  [s]
+  (or (get-page-name s) s))
+
+(defn block-ref-un-brackets!
+  [s]
+  (when (string? s)
+    (if (block-ref? s)
+      (subs s 2 (- (count s) 2))
+      s)))
+
+;; E.g "Foo Bar"
+(defn sep-by-comma
+  [s]
+  (when s
+    (some->>
+     (string/split s #"[\,|,]{1}")
+     (remove string/blank?)
+     (map string/trim))))
+
+(defn sep-by-hashtag
+  [s]
+  (when s
+    (some->>
+     (string/split s #"#")
+     (remove string/blank?)
+     (map string/trim))))
+
+(defn- not-matched-nested-pages
+  [s]
+  (and (string? s)
+       (> (count (re-seq #"\[\[" s))
+          (count (re-seq #"\]\]" s)))))
+
+(defn- ref-matched?
+  [s]
+  (let [x (re-seq #"\[\[" s)
+        y (re-seq #"\]\]" s)]
+    (and (> (count x) 0) (= (count x) (count y)))))
+
+(defn get-nested-page-name
+  [page-name]
+  (when-let [first-match (re-find page-ref-re-without-nested page-name)]
+    (second first-match)))
+
+(defn- concat-nested-pages
+  [coll]
+  (first
+   (reduce (fn [[acc not-matched-s] s]
+             (cond
+               (and not-matched-s (= s "]]"))
+               (let [s' (str not-matched-s s)]
+                 (if (ref-matched? s')
+                   [(conj acc s') nil]
+                   [acc s']))
+
+               not-matched-s
+               [acc (str not-matched-s s)]
+
+               (not-matched-nested-pages s)
+               [acc s]
+
+               :else
+               [(conj acc s) not-matched-s])) [[] nil] coll)))
+
+(defn- sep-by-quotes
+  [s]
+  (string/split s #"(\"[^\"]*\")"))
+
+(def markdown-link #"\[([^\[]+)\](\(.*\))")
+(defn split-page-refs-without-brackets
+  ([s]
+   (split-page-refs-without-brackets s {}))
+  ([s {:keys [un-brackets?]
+       :or {un-brackets? true}}]
+   (cond
+     (and (string? s) (util/wrapped-by-quotes? s))
+     (util/unquote-string s)
+
+     (and (string? s) (re-find markdown-link s))
+     s
+
+     (and (string? s)
+            ;; Either a page ref, a tag or a comma separated collection
+            (or (util/safe-re-find page-ref-re s)
+                (util/safe-re-find #"[\,|,|#|\"]+" s)))
+     (let [result (->> (sep-by-quotes s)
+                       (mapcat
+                        (fn [s]
+                          (when-not (util/wrapped-by-quotes? (string/trim s))
+                            (string/split s page-ref-re-2))))
+                       (mapcat (fn [s]
+                                 (cond
+                                   (util/wrapped-by-quotes? s)
+                                   nil
+
+                                   (string/includes? (string/trimr s) "]],")
+                                   (let [idx (string/index-of s "]],")]
+                                     [(subs s 0 idx)
+                                      "]]"
+                                      (subs s (+ idx 3))])
+
+                                   :else
+                                   [s])))
+                       (remove #(= % ""))
+                       (mapcat (fn [s] (if (string/ends-with? s "]]")
+                                         [(subs s 0 (- (count s) 2))
+                                          "]]"]
+                                         [s])))
+                       concat-nested-pages
+                       (remove string/blank?)
+                       (mapcat (fn [s]
+                                 (cond
+                                   (util/wrapped-by-quotes? s)
+                                   nil
+
+                                   (page-ref? s)
+                                   [(if un-brackets? (page-ref-un-brackets! s) s)]
+
+                                   :else
+                                   (->> (sep-by-comma s)
+                                        (mapcat sep-by-hashtag)))))
+                       (distinct))]
+       (if (or (coll? result)
+               (and (string? result)
+                    (string/starts-with? result "#")))
+         (let [result (if coll? result [result])
+               result (map (fn [s] (string/replace s #"^#+" "")) result)]
+           (set result))
+         (first result)))
+
+     :else
+     s)))
+
+(defn extract-level-spaces
+  [text _format]
+  (if-not (string/blank? text)
+    (let [pattern (gstring/format
+                   "^[%s]+\\s?"
+                   ;; TODO: Pass in config
+                   "-"  #_(config/get-block-pattern format))]
+      (util/safe-re-find (re-pattern pattern) text))
+    ""))
+
+(defn- remove-level-space-aux!
+  [text pattern space? trim-left?]
+  (let [pattern (gstring/format
+                 (if space?
+                   "^[%s]+\\s+"
+                   "^[%s]+\\s?")
+                 pattern)
+        text (if trim-left? (string/triml text) text)]
+    (string/replace-first text (re-pattern pattern) "")))
+
+(defn remove-level-spaces
+  ([text format]
+   (remove-level-spaces text format false true))
+  ([text format space?]
+   (remove-level-spaces text format space? true))
+  ([text format space? trim-left?]
+   (when format
+     (cond
+       (string/blank? text)
+       ""
+
+       (and (= "markdown" (name format))
+            (string/starts-with? text "---"))
+       text
+
+       :else
+       (remove-level-space-aux! text "-" #_(config/get-block-pattern format) space? trim-left?)))))
+
+(defn build-data-value
+  [col]
+  (let [items (map (fn [item] (str "\"" item "\"")) col)]
+    (gstring/format "[%s]"
+                 (string/join ", " items))))
+
+(defn media-link?
+  [media-formats s]
+  (some (fn [fmt] (util/safe-re-find (re-pattern (str "(?i)\\." fmt "(?:\\?([^#]*))?(?:#(.*))?$")) s)) media-formats))
+
+(defn namespace-page?
+  [p]
+  (and (string? p)
+       (string/includes? p "/")
+       (not (string/starts-with? p "../"))
+       (not (string/starts-with? p "./"))
+       (not (util/url? p))))
+
+(defn add-timestamp
+  [content key value]
+  (let [new-line (str (string/upper-case key) ": " value)
+        lines (string/split-lines content)
+        new-lines (map (fn [line]
+                         (string/trim
+                          (if (string/starts-with? (string/lower-case line) key)
+                            new-line
+                            line)))
+                    lines)
+        new-lines (if (not= (map string/trim lines) new-lines)
+                    new-lines
+                    (cons (first new-lines) ;; title
+                          (cons
+                           new-line
+                           (rest new-lines))))]
+    (string/join "\n" new-lines)))
+
+(defn remove-timestamp
+  [content key]
+  (let [lines (string/split-lines content)
+        new-lines (filter (fn [line]
+                            (not (string/starts-with? (string/lower-case line) key)))
+                          lines)]
+    (string/join "\n" new-lines)))
+
+(defn get-current-line-by-pos
+  [s pos]
+  (let [lines (string/split-lines s)
+        result (reduce (fn [acc line]
+                         (let [new-pos (+ acc (count line))]
+                           (if (>= new-pos pos)
+                             (reduced line)
+                             (inc new-pos)))) 0 lines)]
+    (when (string? result)
+      result)))
+
+(defn get-string-all-indexes
+  "Get all indexes of `value` in the string `s`."
+  [s value]
+  (loop [acc []
+         i 0]
+    (if-let [i (string/index-of s value i)]
+      (recur (conj acc i) (+ i (count value)))
+      acc)))
+
+(defn surround-by?
+  "`pos` must be surrounded by `before` and `and` in string `value`, e.g. ((|))"
+  [value pos before end]
+  (let [start-pos (if (= :start before) 0 (- pos (count before)))
+        end-pos (if (= :end end) (count value) (+ pos (count end)))]
+    (when (>= (count value) end-pos)
+      (= (cond
+           (and (= :end end) (= :start before))
+           ""
+
+           (= :end end)
+           before
+
+           (= :start before)
+           end
+
+           :else
+           (str before end))
+         (subs value start-pos end-pos)))))
+
+(defn wrapped-by?
+  "`pos` must be wrapped by `before` and `and` in string `value`, e.g. ((a|b))"
+  [value pos before end]
+  (let [before-matches (->> (get-string-all-indexes value before)
+                            (map (fn [i] [i :before])))
+        end-matches (->> (get-string-all-indexes value end)
+                         (map (fn [i] [i :end])))
+        indexes (sort-by first (concat before-matches end-matches [[pos :between]]))
+        ks (map second indexes)
+        q [:before :between :end]]
+    (true?
+     (reduce (fn [acc k]
+               (if (= q (conj acc k))
+                 (reduced true)
+                 (vec (take-last 2 (conj acc k)))))
+             []
+             ks))))
+
+(defn get-graph-name-from-path
+  [path]
+  (when (string? path)
+    (let [parts (->> (string/split path #"/")
+                     (take-last 2))]
+      (-> (if (not= (first parts) "0")
+            (string/join "/" parts)
+            (last parts))
+          js/decodeURI))))
+
+(defonce non-parsing-properties
+  (atom #{"background-color" "background_color"}))
+
+(defn parse-property
+  ([k v]
+   (parse-property :markdown k v))
+  ([format k v]
+   (let [k (name k)
+         v (if (or (symbol? v) (keyword? v)) (name v) (str v))
+         v (string/trim v)]
+     (cond
+       (contains? (set/union
+                   #{"title" "filters"}
+                   ;; TODO: Pass in config
+                   #_(get (state/get-config) :ignored-page-references-keywords)) k)
+       v
+
+       (= v "true")
+       true
+       (= v "false")
+       false
+
+       (and (not= k "alias") (util/safe-re-find #"^\d+$" v))
+       (util/safe-parse-int v)
+
+       (util/wrapped-by-quotes? v) ; wrapped in ""
+       v
+
+       (contains? @non-parsing-properties (string/lower-case k))
+       v
+
+       ;; TODO: Enable mldoc/link?
+       ; (mldoc/link? format v)
+       ; v
+
+       :else
+       (split-page-refs-without-brackets v)))))

+ 171 - 0
src/main/logseq/graph_parser/util.cljs

@@ -0,0 +1,171 @@
+(ns logseq.graph-parser.util
+  "General util fns that only rely on clojure core fns. Fns from frontend.util"
+  (:require [clojure.walk :as walk]
+            [clojure.string :as string]))
+
+(def uuid-pattern "[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0-9a-f]{12}")
+(defonce exactly-uuid-pattern (re-pattern (str "(?i)^" uuid-pattern "$")))
+
+(defn safe-re-find
+  [pattern s]
+  (when (string? s)
+    (re-find pattern s)))
+
+(defn uuid-string?
+  [s]
+  (safe-re-find exactly-uuid-pattern s))
+
+(defn remove-nils
+  "remove pairs of key-value that has nil value from a (possibly nested) map."
+  [nm]
+  (walk/postwalk
+   (fn [el]
+     (if (map? el)
+       (into {} (remove (comp nil? second)) el)
+       el))
+   nm))
+
+(defn path-normalize
+  "Normalize file path (for reading paths from FS, not required by writting)"
+  [s]
+  (.normalize s "NFC"))
+
+(defn distinct-by
+  [f col]
+  (reduce
+   (fn [acc x]
+     (if (some #(= (f x) (f %)) acc)
+       acc
+       (vec (conj acc x))))
+   []
+   col))
+
+(defn split-first [pattern s]
+  (when-let [first-index (string/index-of s pattern)]
+    [(subs s 0 first-index)
+     (subs s (+ first-index (count pattern)) (count s))]))
+
+(defn split-last [pattern s]
+  (when-let [last-index (string/last-index-of s pattern)]
+    [(subs s 0 last-index)
+     (subs s (+ last-index (count pattern)) (count s))]))
+
+(defn get-file-ext
+  [file]
+  (and
+   (string? file)
+   (string/includes? file ".")
+   (some-> (last (string/split file #"\.")) string/lower-case)))
+
+(defn split-namespace-pages
+  [title]
+  (let [parts (string/split title "/")]
+    (loop [others (rest parts)
+           result [(first parts)]]
+      (if (seq others)
+        (let [prev (last result)]
+          (recur (rest others)
+                 (conj result (str prev "/" (first others)))))
+        result))))
+
+(defn remove-boundary-slashes
+  [s]
+  (when (string? s)
+    (let [s (if (= \/ (first s))
+              (subs s 1)
+              s)]
+      (if (= \/ (last s))
+        (subs s 0 (dec (count s)))
+        s))))
+
+(def windows-reserved-chars #"[:\\*\\?\"<>|]+")
+
+(defn page-name-sanity
+  "Sanitize the page-name for file name (strict), for file writting"
+  ([page-name]
+   (page-name-sanity page-name false))
+  ([page-name replace-slash?]
+   (let [page (some-> page-name
+                      (remove-boundary-slashes)
+                      ;; Windows reserved path characters
+                      (string/replace windows-reserved-chars "_")
+                      ;; for android filesystem compatiblity
+                      (string/replace #"[\\#|%]+" "_")
+                      (path-normalize))]
+     (if replace-slash?
+       (string/replace page #"/" ".")
+       page))))
+
+(defn page-name-sanity-lc
+  "Sanitize the query string for a page name (mandate for :block/name)"
+  [s]
+  (page-name-sanity (string/lower-case s)))
+
+(defn zero-pad
+  [n]
+  (if (< n 10)
+    (str "0" n)
+    (str n)))
+
+(defn url?
+  [s]
+  (and (string? s)
+       (try
+         (js/URL. s)
+         true
+         (catch js/Error _e
+           false))))
+
+(defn parse-int
+  [x]
+  (if (string? x)
+    (js/parseInt x)
+    x))
+
+(defn safe-parse-int
+  [x]
+  (let [result (parse-int x)]
+    (if (js/isNaN result)
+      nil
+      result)))
+
+(defn wrapped-by-quotes?
+  [v]
+  (and (string? v) (>= (count v) 2) (= "\"" (first v) (last v))))
+
+(defn unquote-string
+  [v]
+  (string/trim (subs v 1 (dec (count v)))))
+
+(defn tag-valid?
+  [tag-name]
+  (when (string? tag-name)
+    (not (safe-re-find #"[# \t\r\n]+" tag-name))))
+
+;; TODO: Use medley instead
+(defn map-keys
+  "Maps function `f` over the keys of map `m` to produce a new map."
+  [f m]
+  (reduce-kv
+   (fn [m_ k v]
+     (assoc m_ (f k) v)) {} m))
+
+(defn safe-subs
+  ([s start]
+   (let [c (count s)]
+     (safe-subs s start c)))
+  ([s start end]
+   (let [c (count s)]
+     (subs s (min c start) (min c end)))))
+
+(defn json->clj
+  ([json-string]
+   (json->clj json-string false))
+  ([json-string kebab?]
+   (let [m (-> json-string
+               (js/JSON.parse)
+               (js->clj :keywordize-keys true))]
+     (if kebab?
+       m
+       #_(cske/transform-keys csk/->kebab-case-keyword m)
+       m))))