浏览代码

enhance: speed up import of doc files and make them scriptable

Refactored the largest part of the import to used promesa which made it
nbb compatible and made large imports like docs 10-25% faster.  Also
updated db-import script to use this work
Gabriel Horner 1 年之前
父节点
当前提交
e3cc4bad36

+ 53 - 25
deps/graph-parser/script/db_import.cljs

@@ -3,43 +3,70 @@
    developing the import feature and for engineers who want to customize
    the import process"
   (:require [clojure.string :as string]
+            [clojure.edn :as edn]
             [datascript.core :as d]
             ["path" :as node-path]
             ["os" :as os]
             ["fs" :as fs]
+            ["fs/promises" :as fsp]
             [nbb.core :as nbb]
             [babashka.cli :as cli]
-            [logseq.common.config :as common-config]
             [logseq.graph-parser.exporter :as gp-exporter]
-            [logseq.tasks.db-graph.create-graph :as create-graph]))
+            [logseq.common.graph :as common-graph]
+            [logseq.common.config :as common-config]
+            [logseq.tasks.db-graph.create-graph :as create-graph]
+            [promesa.core :as p]))
+
+(defn- remove-hidden-files [dir config files]
+  (if (seq (:hidden config))
+    (->> files
+         (map #(assoc % ::rel-path (node-path/relative dir (:rpath %))))
+         ((fn [files] (common-config/remove-hidden-files files config ::rel-path)))
+         (map #(dissoc % ::rel-path)))
+    files))
+
+(defn- build-graph-files
+  "Given a graph directory, return absolute, allowed file paths and their contents in preparation
+   for parsing"
+  [dir* config]
+  (let [dir (node-path/resolve dir*)]
+    (->> (common-graph/get-files dir)
+         (mapv #(hash-map :rpath %))
+         (remove-hidden-files dir config))))
 
-(defn- setup-import-options
-  [db config user-options]
-  {:extract-options {:date-formatter (common-config/get-date-formatter config)
-                     :user-config config
-                     :filename-format (or (:file/name-format config) :legacy)}
-   :user-options user-options
-   :page-tags-uuid (:block/uuid (d/entity db [:block/name "pagetags"]))
-   :import-state (gp-exporter/new-import-state)
-   :macros (:macros config)})
+(defn- read-config
+  "Reads repo-specific config from logseq/config.edn"
+  [dir]
+  (let [config-file (str dir "/" common-config/app-name "/config.edn")]
+    (if (fs/existsSync config-file)
+      (-> config-file fs/readFileSync str edn/read-string)
+      {})))
 
-(defn- import-file-graph-to-db [file-graph conn db-name]
-  ;; TODO: Read in repo config
-  (let [import-options (setup-import-options @conn
-                                             {:file/name-format :triple-lowbar}
-                                             {:graph-name db-name})
-        ;; TODO: Read files dir and port more from import
-        file file-graph
-        m {:file/path file
-           :file/content (str (fs/readFileSync file))}]
-    (gp-exporter/add-file-to-db-graph conn (:file/path m) (:file/content m) import-options)))
+(defn- import-file-graph-to-db [file-graph-dir conn user-options]
+  (let [config (read-config file-graph-dir)
+        import-options (gp-exporter/setup-import-options
+                        @conn
+                        config
+                        user-options
+                        {:notify-user prn})
+        ;; TODO: Remove logseq/ filter when higher-level import fn is available
+        files (remove #(re-find #"logseq/" (:rpath %)) (build-graph-files file-graph-dir config))]
+    ;; (prn :files (count files) files)
+    (gp-exporter/import-from-doc-files!
+     conn files #(p/let [s (fsp/readFile (:rpath %))] (str s)) import-options)))
 
 (def spec
   "Options spec"
   {:help {:alias :h
           :desc "Print help"}
    :verbose {:alias :v
-             :desc "Verbose mode"}})
+             :desc "Verbose mode"}
+   :tag-classes {:alias :t
+                 :coerce []
+                 :desc "List of tags to convert to classes"}
+   :property-classes {:alias :p
+                      :coerce []
+                      :desc "List of properties whose values convert to classes"}})
 
 (defn -main [args]
   (let [[file-graph db-graph-dir] args
@@ -55,9 +82,10 @@
                         [(node-path/join (os/homedir) "logseq" "graphs") db-graph-dir])
         file-graph' (node-path/join (or js/process.env.ORIGINAL_PWD ".") file-graph)
         conn (create-graph/init-conn dir db-name)]
-    (import-file-graph-to-db file-graph' conn db-name)
-    (when (:verbose options) (println "Transacted" (count (d/datoms @conn :eavt)) "datoms"))
-    (println "Created graph" (str db-name "!"))))
+    (p/do!
+     (import-file-graph-to-db file-graph' conn (merge options {:graph-name db-name}))
+     (when (:verbose options) (println "Transacted" (count (d/datoms @conn :eavt)) "datoms"))
+     (println "Created graph" (str db-name "!")))))
 
 (when (= nbb/*file* (:file (meta #'-main)))
   (-main *command-line-args*))

+ 59 - 8
deps/graph-parser/src/logseq/graph_parser/exporter.cljs

@@ -11,7 +11,8 @@
             [logseq.db.frontend.property :as db-property]
             [logseq.db.frontend.property.type :as db-property-type]
             [logseq.common.util.macro :as macro-util]
-            [logseq.db.sqlite.util :as sqlite-util]))
+            [logseq.db.sqlite.util :as sqlite-util]
+            [promesa.core :as p]))
 
 (defn- get-pid
   "Get a property's id (name or uuid) given its name. For db graphs"
@@ -159,7 +160,7 @@
 (defn- handle-changed-property
   "Handles converting a property value whose :type has changed. Returns the changed
    value or nil if the property is to be ignored"
-  [val prop prop-name->uuid properties-text-values property-changes ignored-properties]
+  [val prop prop-name->uuid properties-text-values ignored-properties {:keys [property-changes log-fn]}]
   (let [type-change (get-in property-changes [prop :type])]
     (cond
       ;; ignore :to as any property value gets stringified
@@ -170,15 +171,15 @@
       (set (map (comp prop-name->uuid common-util/page-name-sanity-lc) val))
       :else
       (do
-        (js/console.log :prop-change-ignored {:property prop :val val :change type-change})
+        (log-fn :prop-change-ignored {:property prop :val val :change type-change})
         (swap! ignored-properties conj {:property prop :value val :schema (get property-changes prop)})
         nil))))
 
-(defn- update-user-property-values [props prop-name->uuid properties-text-values property-changes ignored-properties]
+(defn- update-user-property-values [props prop-name->uuid properties-text-values ignored-properties {:keys [property-changes] :as options}]
   (->> props
        (keep (fn [[prop val]]
                (if (get-in property-changes [prop :type])
-                 (when-let [val' (handle-changed-property val prop prop-name->uuid properties-text-values property-changes ignored-properties)]
+                 (when-let [val' (handle-changed-property val prop prop-name->uuid properties-text-values ignored-properties options)]
                    [prop val'])
                  [prop
                   (cond
@@ -199,7 +200,7 @@
   "Updates block property names and values"
   [props db page-names-to-uuids
    {:block/keys [properties-text-values] :as block}
-   {:keys [whiteboard? property-changes import-state]}]
+   {:keys [whiteboard? import-state] :as options}]
   (let [prop-name->uuid (if whiteboard?
                           (fn prop-name->uuid [k]
                             (or (get-pid db k)
@@ -216,7 +217,7 @@
            db
            (:ignored-properties import-state)
            (select-keys block [:block/name :block/content]))
-          (merge (update-user-property-values user-properties prop-name->uuid properties-text-values property-changes (:ignored-properties import-state)))
+          (merge (update-user-property-values user-properties prop-name->uuid properties-text-values (:ignored-properties import-state) options))
           (update-keys prop-name->uuid)))))
 
 (defn- handle-page-properties
@@ -477,4 +478,54 @@
         tx (concat whiteboard-pages pages-index pages-tx block-ids blocks-tx)
         tx' (common-util/fast-remove-nils tx)
         result (d/transact! conn tx')]
-    result))
+    result))
+
+;; UI facing fns
+;; =============
+
+(defn setup-import-options
+  [db config user-options {:keys [macros notify-user]}]
+  (cond-> {:extract-options {:date-formatter (common-config/get-date-formatter config)
+                             :user-config config
+                             :filename-format (or (:file/name-format config) :legacy)}
+           :user-options user-options
+           :page-tags-uuid (:block/uuid (d/entity db [:block/name "pagetags"]))
+           :import-state (new-import-state)
+           :macros (or macros (:macros config))}
+    notify-user
+    (assoc :notify-user notify-user)))
+
+(defn- import-doc-file
+  [{:keys [rpath idx] :as file} conn <read-file
+   {:keys [notify-user set-ui-state import-file]
+    :or {set-ui-state (constantly nil)
+         import-file (fn import-file [conn m opts]
+                       (add-file-to-db-graph conn (:file/path m) (:file/content m) opts))}
+    :as import-options}]
+  ;; (prn :import-doc-file rpath idx)
+  (-> (p/let [_ (set-ui-state [:graph/importing-state :current-idx] (inc idx))
+              _ (set-ui-state [:graph/importing-state :current-page] rpath)
+              content (<read-file file)
+              m {:file/path rpath :file/content content}]
+        (import-file conn m (dissoc import-options :set-ui-state :import-file))
+        ;; returning val results in smoother ui updates
+        m)
+      (p/catch (fn [error]
+                 (notify-user {:msg (str "Import failed on " (pr-str rpath) " with error:\n" error)
+                               :level :error
+                               :ex-data {:path rpath :error error}})))))
+
+(defn import-from-doc-files!
+  [conn *doc-files <read-file {:keys [notify-user set-ui-state]
+                                      :or {set-ui-state (constantly nil) notify-user prn}
+                                      :as import-options}]
+  (set-ui-state [:graph/importing-state :total] (count *doc-files))
+  (let [doc-files (mapv #(assoc %1 :idx %2) *doc-files (range 0 (count *doc-files)))]
+    (-> (p/loop [_file-map (import-doc-file (get doc-files 0) conn <read-file import-options)
+                 i 0]
+          (when-not (>= i (dec (count doc-files)))
+            (p/recur (import-doc-file (get doc-files (inc i)) conn <read-file import-options)
+                     (inc i))))
+        (p/catch (fn [e]
+                   (notify-user {:msg (str "Import has unexpected error:\n" e)
+                                 :level :error}))))))

+ 24 - 55
src/main/frontend/components/imports.cljs

@@ -33,12 +33,10 @@
             [logseq.db :as ldb]
             [logseq.graph-parser.exporter :as gp-exporter]
             [logseq.outliner.core :as outliner-core]
-            [medley.core :as medley]
             [promesa.core :as p]
             [rum.core :as rum]
             [logseq.common.config :as common-config]
             [lambdaisland.glogi :as log]
-            [frontend.handler.db-based.property.util :as db-pu]
             [logseq.db.frontend.validate :as db-validate]))
 
 ;; Can't name this component as `frontend.components.import` since shadow-cljs
@@ -169,52 +167,6 @@
       (ui/button "Submit"
                  {:on-click on-submit})]]))
 
-(defn- import-from-doc-files!
-  [db-conn repo config doc-files import-state user-options]
-  (let [imported-chan (async/promise-chan)
-        page-tags-uuid (db-pu/get-built-in-property-uuid repo :pagetags)]
-    (try
-      (let [docs-chan (async/to-chan! (medley/indexed doc-files))]
-        (state/set-state! [:graph/importing-state :total] (count doc-files))
-        (async/go-loop []
-          (if-let [[i {:keys [rpath] :as file}] (async/<! docs-chan)]
-            (let [extract-options {:date-formatter (common-config/get-date-formatter config)
-                                   :user-config config
-                                   :filename-format (or (:file/name-format config) :legacy)
-                                   :block-pattern (common-config/get-block-pattern (common-util/get-format rpath))}]
-              (state/set-state! [:graph/importing-state :current-idx] (inc i))
-              (state/set-state! [:graph/importing-state :current-page] rpath)
-              (async/<! (async/timeout 10))
-              (async/<! (p->c (-> (.text (:file-object file))
-                                  (p/then (fn [content]
-                                            (prn :import-file rpath i)
-                                            {:file/path rpath
-                                             :file/content content}))
-                                  (p/then (fn [m]
-                                            ;; Write to frontend first as writing to worker first is poor ux with slow streaming changes
-                                            (let [tx-report
-                                                  (gp-exporter/add-file-to-db-graph
-                                                   db-conn
-                                                   (:file/path m)
-                                                   (:file/content m)
-                                                   {:extract-options extract-options
-                                                    :user-options user-options
-                                                    :page-tags-uuid page-tags-uuid
-                                                    :import-state import-state
-                                                    :macros (state/get-macros)
-                                                    :notify-user #(notification/show! (:msg %) :warning false)})]
-                                              (db-browser/transact! @db-browser/*worker repo (:tx-data tx-report) (:tx-meta tx-report)))
-                                            m))
-                                  (p/catch (fn [error]
-                                             (notification/show! (str "Import failed on " (pr-str rpath) " with error:\n" error)
-                                                                 :error)
-                                             (log/error :import-error {:path rpath :error error}))))))
-              (recur))
-            (async/offer! imported-chan true))))
-      (catch :default e
-        (notification/show! (str "Error happens when importing:\n" e) :error)
-        (async/offer! imported-chan true)))))
-
 (defn- import-from-asset-files!
   [asset-files]
   (let [ch (async/to-chan! asset-files)
@@ -365,6 +317,7 @@
    :pages (count (filter :block/name entities))
    :blocks (count (filter :block/content entities))
    :classes (count (filter #(contains? (:block/type %) "class") entities))
+   :objects (count (filter #(seq (:block/tags %)) entities))
    :properties (count (filter #(contains? (:block/type %) "property") entities))
    :property-values (count (mapcat :block/properties entities))})
 
@@ -404,14 +357,12 @@
                             :warning false))
       (log/info :import-valid {:msg "Valid import!"
                                :counts (assoc (counts-from-entities entities) :datoms datom-count)}))))
-
 (defn- import-file-graph
   [*files {:keys [graph-name tag-classes property-classes]} config-file]
   (state/set-state! :graph/importing :file-graph)
   (state/set-state! [:graph/importing-state :current-page] (str graph-name " Assets"))
   (async/go
     (let [start-time (t/now)
-          import-state (gp-exporter/new-import-state)
           _ (async/<! (p->c (repo-handler/new-db! graph-name {:file-graph-import? true})))
           repo (state/get-current-repo)
           db-conn (db/get-db repo false)
@@ -421,17 +372,35 @@
           doc-files (->> files
                          (remove logseq-file?)
                          (filter #(contains? #{"md" "org" "markdown" "edn"} (path/file-ext (:rpath %)))))
-          asset-files (filter #(string/starts-with? (:rpath %) "assets/") files)]
+          asset-files (filter #(string/starts-with? (:rpath %) "assets/") files)
+          import-options (merge
+                          (gp-exporter/setup-import-options
+                           @db-conn
+                           config
+                           {:tag-classes (set (string/split tag-classes #",\s*"))
+                            :property-classes (set (string/split property-classes #",\s*"))}
+                           {:macros (state/get-macros)
+                            :notify-user #(if (= :error (:level %))
+                                            (do
+                                              (notification/show! (:msg %) :error)
+                                              (when (:ex-data %)
+                                                (log/error :import-error (:ex-data %))))
+                                            (notification/show! (:msg %) :warning false))})
+                          {:set-ui-state state/set-state!
+                           ;; Write to frontend first as writing to worker first is poor ux with slow streaming changes
+                           :import-file (fn import-file [conn m opts]
+                                          (let [tx-report
+                                                  (gp-exporter/add-file-to-db-graph conn (:file/path m) (:file/content m) opts)]
+                                              (db-browser/transact! @db-browser/*worker repo (:tx-data tx-report) (:tx-meta tx-report))))})
+          <read-file (fn [file] (.text (:file-object file)))]
       (async/<! (p->c (import-logseq-files (filter logseq-file? files))))
       (async/<! (import-from-asset-files! asset-files))
-      (async/<! (import-from-doc-files! db-conn repo config doc-files import-state
-                                        {:tag-classes (set (string/split tag-classes #",\s*"))
-                                         :property-classes (set (string/split property-classes #",\s*")) }))
+      (async/<! (p->c (gp-exporter/import-from-doc-files! db-conn doc-files <read-file import-options)))
       (async/<! (p->c (import-favorites-from-config-edn! db-conn repo config-file)))
       (log/info :import-file-graph {:msg (str "Import finished in " (/ (t/in-millis (t/interval start-time (t/now))) 1000) " seconds")})
       (state/set-state! :graph/importing nil)
       (state/set-state! :graph/importing-state nil)
-      (validate-imported-data @db-conn import-state files)
+      (validate-imported-data @db-conn (:import-state import-options) files)
       (finished-cb))))
 
 (defn import-file-to-db-handler