浏览代码

fix(import): remove asset byte-array usage to prevent OOM

This commit also combine read-asset and copy-asset to read-and-copy-asset
Tienson Qin 3 周之前
父节点
当前提交
971cec54bc

+ 25 - 22
deps/graph-parser/script/db_import.cljs

@@ -49,26 +49,31 @@
   (p/let [s (fsp/readFile (:path file))]
     (str s)))
 
-(defn- <read-asset-file [file assets]
-  (p/let [buffer (fs/readFileSync (:path file))
-          checksum (db-asset/<get-file-array-buffer-checksum buffer)]
-    (swap! assets assoc
-           (gp-exporter/asset-path->name (:path file))
-           {:size (.-length buffer)
-            :checksum checksum
-            :type (db-asset/asset-path->type (:path file))
-            :path (:path file)})
-    buffer))
+(defn- exceed-limit-size?
+  "Asset size no more than 100M"
+  [^js buffer]
+  (> (.-length buffer) (* 100 1024 1024)))
 
-(defn- <copy-asset-file [asset-m db-graph-dir]
-  (p/let [parent-dir (node-path/join db-graph-dir common-config/local-assets-dir)
-          _ (fsp/mkdir parent-dir #js {:recursive true})]
-    (if (:block/uuid asset-m)
-      (fsp/copyFile (:path asset-m) (node-path/join parent-dir (str (:block/uuid asset-m) "." (:type asset-m))))
-      (when-not (:pdf-annotation? asset-m)
-        (println "[INFO]" "Copied asset" (pr-str (node-path/basename (:path asset-m)))
-                 "by its name since it was unused.")
-        (fsp/copyFile (:path asset-m) (node-path/join parent-dir (node-path/basename (:path asset-m))))))))
+(defn- <read-and-copy-asset [db-graph-dir file assets buffer-handler]
+  (p/let [buffer (fs/readFileSync (:path file))
+          checksum (db-asset/<get-file-array-buffer-checksum buffer)
+          asset-id (d/squuid)
+          asset-name (gp-exporter/asset-path->name (:path file))
+          asset-type (db-asset/asset-path->type (:path file))]
+    (if (exceed-limit-size? buffer)
+      (js/console.error (str "Asset size shouldn't be larger than 100M, path: " (:path file)))
+      (p/let [parent-dir (node-path/join db-graph-dir common-config/local-assets-dir)
+              {:keys [with-edn-content pdf-annotation?]} (buffer-handler buffer)]
+        (fsp/mkdir parent-dir #js {:recursive true})
+        (swap! assets assoc asset-name
+               (with-edn-content
+                 {:size (.-length buffer)
+                  :type asset-type
+                  :path (:path file)
+                  :checksum checksum
+                  :asset-id asset-id}))
+        (when-not pdf-annotation?
+          (fsp/copyFile (:path file) (node-path/join parent-dir (str asset-id "." asset-type))))))))
 
 (defn- notify-user [{:keys [continue debug]} m]
   (println (:msg m))
@@ -119,9 +124,7 @@
         options (merge options
                        (default-export-options options)
                         ;; asset file options
-                       {:<copy-asset (fn copy-asset [file]
-                                       (<copy-asset-file file db-graph-dir))
-                        :<read-asset <read-asset-file})]
+                       {:<read-and-copy-asset #(<read-and-copy-asset db-graph-dir %1 %2 %3)})]
     (p/with-redefs [d/transact! dev-transact!]
       (gp-exporter/export-file-graph conn conn config-file *files options))))
 

+ 54 - 57
deps/graph-parser/src/logseq/graph_parser/exporter.cljs

@@ -1120,8 +1120,7 @@
 
 (defn- build-new-asset [asset-data]
   (merge (sqlite-util/block-with-timestamps
-          {:block/uuid (d/squuid)
-           :block/order (db-order/gen-key)
+          {:block/order (db-order/gen-key)
            :block/page :logseq.class/Asset
            :block/parent :logseq.class/Asset})
          {:block/tags [:logseq.class/Asset]
@@ -1129,16 +1128,25 @@
           :logseq.property.asset/checksum (:checksum asset-data)
           :logseq.property.asset/size (:size asset-data)}))
 
+(defn- get-asset-block-id
+  [assets path]
+  (or (get-in @assets [path :block/uuid])
+      (get-in @assets [path :asset-id])))
+
 (defn- build-annotation-images
   "Builds tx for annotation images and provides a map for mapping image asset names
    to their new uuids"
   [parent-asset-path assets]
   (let [image-dir (string/replace-first parent-asset-path #"(?i)\.pdf$" "")
         image-paths (filter #(= image-dir (node-path/dirname %)) (keys @assets))
-        txs (mapv #(let [new-asset (merge (build-new-asset (get @assets %))
-                                          {:block/title "pdf area highlight"})]
-                     (swap! assets assoc-in [% :block/uuid] (:block/uuid new-asset))
-                     new-asset)
+        txs (keep #(let [asset-id (get-asset-block-id assets %)]
+                     (if-not asset-id
+                       (js/console.error (str "No asset-id for " %))
+                       (let [new-asset (merge (build-new-asset (get @assets %))
+                                              {:block/title "pdf area highlight"
+                                               :block/uuid asset-id})]
+                         (swap! assets assoc-in [% :block/uuid] asset-id)
+                         new-asset)))
                   image-paths)]
     {:txs txs
      :image-asset-name-to-uuids
@@ -1163,8 +1171,6 @@
         asset-md-name (str "hls__" (safe-sanitize-file-name
                                     (node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".md"))))]
     (when-let [asset-edn-map (get @assets asset-edn-path)]
-      ;; Mark edn asset so it isn't treated like a normal asset later
-      (swap! assets assoc-in [asset-edn-path :pdf-annotation?] true)
       (let [{:keys [txs image-asset-name-to-uuids]} (build-annotation-images parent-asset-path assets)]
         (concat txs
                 (build-pdf-annotations-tx* asset-edn-map (get @pdf-annotation-pages asset-md-name) parent-asset image-asset-name-to-uuids opts))))))
@@ -1178,10 +1184,17 @@
            (fn [asset-link]
              (let [asset-name (-> asset-link second :url second asset-path->name)]
                (if-let [asset-data (and asset-name (get @assets asset-name))]
-                 (if (:block/uuid asset-data)
+                 (cond
+                   (:block/uuid asset-data)
                    {:asset-name-uuid [asset-name (:block/uuid asset-data)]}
+
+                   (nil? (get-asset-block-id assets asset-name))
+                   (js/console.error (str "No asset-id for " asset-name))
+
+                   :else
                    (let [new-asset (merge (build-new-asset asset-data)
-                                          {:block/title (db-asset/asset-name->title (node-path/basename asset-name))}
+                                          {:block/title (db-asset/asset-name->title (node-path/basename asset-name))
+                                           :block/uuid (get-asset-block-id assets asset-name)}
                                           (when-let [metadata (not-empty (common-util/safe-read-map-string (:metadata (second asset-link))))]
                                             {:logseq.property.asset/resize-metadata metadata}))
                          pdf-annotations-tx (when (= "pdf" (path/file-ext asset-name))
@@ -1949,49 +1962,36 @@
                                :level :error
                                :ex-data {:error e}})))))
 
-(defn- read-asset-files
-  "Reads files under assets/"
-  [*asset-files <read-asset-file {:keys [notify-user set-ui-state assets]
-                                  :or {set-ui-state (constantly nil)}}]
-  (assert <read-asset-file "read-asset-file fn required")
+(defn- read-and-copy-asset-files
+  "Reads and copies files under assets/"
+  [*asset-files <read-and-copy-asset-file {:keys [notify-user set-ui-state assets]
+                                           :or {set-ui-state (constantly nil)}}]
+  (assert <read-and-copy-asset-file "read-and-copy-asset-file fn required")
   (let [asset-files (mapv #(assoc %1 :idx %2)
                           ;; Sort files to ensure reproducible import behavior
                           (sort-by :path *asset-files)
                           (range 0 (count *asset-files)))
-        read-asset (fn read-asset [{:keys [path] :as file}]
-                     (-> (p/let [byte-array (<read-asset-file file assets)]
-                           (when (= "edn" (path/file-ext (:path file)))
-                             (swap! assets assoc-in
-                                    [(asset-path->name path) :edn-content]
-                                    (common-util/safe-read-map-string (utf8/decode byte-array)))))
-                         (p/catch
-                          (fn [error]
-                            (notify-user {:msg (str "Import failed to read " (pr-str path) " with error:\n" (.-message error))
-                                          :level :error
-                                          :ex-data {:path path :error error}})))))]
+        read-and-copy-asset (fn read-and-copy-asset [{:keys [path] :as file}]
+                              (-> (<read-and-copy-asset-file
+                                   file assets
+                                   (fn [buffer]
+                                     (let [edn? (= "edn" (path/file-ext path))
+                                           edn-content (when edn? (common-util/safe-read-map-string (utf8/decode buffer)))
+                                           pdf-annotation? (some #{:highlights} (keys edn-content))
+                                           with-edn-content (fn [m]
+                                                              (cond-> m
+                                                                edn?
+                                                                (assoc :edn-content edn-content)))]
+                                       {:with-edn-content with-edn-content
+                                        :pdf-annotation? pdf-annotation?})))
+                                  (p/catch
+                                   (fn [error]
+                                     (notify-user {:msg (str "Import failed to read and copy " (pr-str path) " with error:\n" (.-message error))
+                                                   :level :error
+                                                   :ex-data {:path path :error error}})))))]
     (when (seq asset-files)
-      (set-ui-state [:graph/importing-state :current-page] "Read asset files")
-      (<safe-async-loop read-asset asset-files notify-user))))
-
-(defn- copy-asset-files
-  "Copy files under assets/"
-  [asset-maps* <copy-asset-file {:keys [notify-user set-ui-state]
-                                 :or {set-ui-state (constantly nil)}}]
-  (assert <copy-asset-file "copy-asset-file fn required")
-  (let [asset-maps (mapv #(assoc %1 :idx %2)
-                          ;; Sort files to ensure reproducible import behavior
-                         (sort-by :path asset-maps*)
-                         (range 0 (count asset-maps*)))
-        copy-asset (fn copy-asset [{:keys [path] :as asset-m}]
-                     (p/catch
-                      (<copy-asset-file asset-m)
-                      (fn [error]
-                        (notify-user {:msg (str "Import failed to copy " (pr-str path) " with error:\n" (.-message error))
-                                      :level :error
-                                      :ex-data {:path path :error error}}))))]
-    (when (seq asset-maps)
-      (set-ui-state [:graph/importing-state :current-page] "Copy asset files")
-      (<safe-async-loop copy-asset asset-maps notify-user))))
+      (set-ui-state [:graph/importing-state :current-page] "Read and copy asset files")
+      (<safe-async-loop read-and-copy-asset asset-files notify-user))))
 
 (defn- insert-favorites
   "Inserts favorited pages as uuids into a new favorite page"
@@ -2071,11 +2071,10 @@
    * :user-options - map of user specific options. See add-file-to-db-graph for more
    * :<save-config-file - fn which saves a config file
    * :<save-logseq-file - fn which saves a logseq file
-   * :<copy-asset - fn which copies asset file
-   * :<read-asset - fn which reads asset file
+   * :<read-and-copy-asset - fn which reads and copies asset file
 
    Note: See export-doc-files for additional options that are only for it"
-  [repo-or-conn conn config-file *files {:keys [<read-file <copy-asset <read-asset rpath-key log-fn]
+  [repo-or-conn conn config-file *files {:keys [<read-file <read-and-copy-asset rpath-key log-fn]
                                          :or {rpath-key :path log-fn println}
                                          :as options}]
   (reset! gp-block/*export-to-db-graph? true)
@@ -2099,13 +2098,11 @@
                              (-> (select-keys options [:notify-user :<save-logseq-file])
                                  (set/rename-keys {:<save-logseq-file :<save-file})))
         ;; Assets are read first as doc-files need data from them to make Asset blocks.
-        ;; Assets are copied after doc-files as they need block/uuid's from them to name assets
-        (read-asset-files asset-files <read-asset (merge (select-keys options [:notify-user :set-ui-state])
-                                                         {:assets (get-in doc-options [:import-state :assets])}))
+        (read-and-copy-asset-files asset-files
+                                   <read-and-copy-asset
+                                   (merge (select-keys options [:notify-user :set-ui-state])
+                                          {:assets (get-in doc-options [:import-state :assets])}))
         (export-doc-files conn doc-files <read-file doc-options)
-        (copy-asset-files (vals @(get-in doc-options [:import-state :assets]))
-                          <copy-asset
-                          (select-keys options [:notify-user :set-ui-state]))
         (export-favorites-from-config-edn conn repo-or-conn config {})
         (export-class-properties conn repo-or-conn)
         (move-top-parent-pages-to-library conn repo-or-conn)

+ 18 - 17
deps/graph-parser/test/logseq/graph_parser/exporter_test.cljs

@@ -1,7 +1,7 @@
 (ns ^:node-only logseq.graph-parser.exporter-test
   (:require ["fs" :as fs]
             ["path" :as node-path]
-            [cljs.test :refer [testing is are deftest]]
+            [cljs.test :refer [are deftest is testing]]
             [clojure.set :as set]
             [clojure.string :as string]
             [datascript.core :as d]
@@ -94,16 +94,22 @@
    ;; TODO: Add actual default
    :default-config {}})
 
-;; Copied from db-import
-(defn- <read-asset-file [file assets]
+;; tweaked from db-import
+(defn- <read-and-copy-asset [file assets buffer-handler *asset-ids]
   (p/let [buffer (fs/readFileSync (:path file))
-          checksum (db-asset/<get-file-array-buffer-checksum buffer)]
-    (swap! assets assoc
-           (gp-exporter/asset-path->name (:path file))
-           {:size (.-length buffer)
-            :checksum checksum
-            :type (db-asset/asset-path->type (:path file))
-            :path (:path file)})
+          checksum (db-asset/<get-file-array-buffer-checksum buffer)
+          asset-id (d/squuid)
+          asset-name (gp-exporter/asset-path->name (:path file))
+          asset-type (db-asset/asset-path->type (:path file))
+          {:keys [with-edn-content _pdf-annotation?]} (buffer-handler buffer)]
+    (swap! *asset-ids conj asset-id)
+    (swap! assets assoc asset-name
+           (with-edn-content
+             {:size (.-length buffer)
+              :type asset-type
+              :path (:path file)
+              :checksum checksum
+              :asset-id asset-id}))
     buffer))
 
 ;; Copied from db-import script and tweaked for an in-memory import
@@ -118,13 +124,8 @@
         options' (merge default-export-options
                         {:user-options (merge {:convert-all-tags? false} (dissoc options :assets :verbose))
                         ;; asset file options
-                         :<read-asset <read-asset-file
-                         :<copy-asset (fn copy-asset [m]
-                                        (if (:block/uuid m)
-                                          (swap! assets conj m)
-                                          (when-not (:pdf-annotation? m)
-                                            (println "[INFO]" "Asset" (pr-str (node-path/basename (:path m)))
-                                                     "does not have a :block/uuid"))))}
+                         :<read-and-copy-asset (fn [file *assets buffer-handler]
+                                                 (<read-and-copy-asset file *assets buffer-handler assets))}
                         (select-keys options [:verbose]))]
     (gp-exporter/export-file-graph conn conn config-file *files options')))
 

+ 26 - 31
src/main/frontend/components/imports.cljs

@@ -1,9 +1,9 @@
 (ns frontend.components.imports
   "Import data into Logseq."
-  (:require ["path" :as node-path]
-            [cljs-time.core :as t]
+  (:require [cljs-time.core :as t]
             [cljs.pprint :as pprint]
             [clojure.string :as string]
+            [datascript.core :as d]
             [frontend.components.onboarding.setups :as setups]
             [frontend.components.repo :as repo]
             [frontend.components.svg :as svg]
@@ -11,6 +11,7 @@
             [frontend.context.i18n :refer [t]]
             [frontend.db :as db]
             [frontend.fs :as fs]
+            [frontend.handler.assets :as assets-handler]
             [frontend.handler.db-based.editor :as db-editor-handler]
             [frontend.handler.db-based.import :as db-import-handler]
             [frontend.handler.file-based.import :as file-import-handler]
@@ -348,33 +349,28 @@
         (log/error :import-error ex-data)))
     (notification/show! msg :warning false)))
 
-(defn- read-asset [file assets]
-  (-> (.arrayBuffer (:file-object file))
-      (p/then (fn [buffer]
-                (p/let [checksum (db-asset/<get-file-array-buffer-checksum buffer)
-                        byte-array (js/Uint8Array. buffer)]
-                  (swap! assets assoc
-                         (gp-exporter/asset-path->name (:path file))
-                         {:size (.-size (:file-object file))
-                          :checksum checksum
-                          :type (db-asset/asset-path->type (:path file))
-                          :path (:path file)
-                          ;; Save array to avoid reading asset twice
-                          ::byte-array byte-array})
-                  byte-array)))))
-
-(defn- copy-asset [repo repo-dir asset-m]
-  (-> (::byte-array asset-m)
-      (p/then (fn [content]
-                (let [assets-dir (path/path-join repo-dir common-config/local-assets-dir)]
-                  (p/do!
-                   (fs/mkdir-if-not-exists assets-dir)
-                   (if (:block/uuid asset-m)
-                     (fs/write-plain-text-file! repo assets-dir (str (:block/uuid asset-m) "." (:type asset-m)) content {:skip-transact? true})
-                     (when-not (:pdf-annotation? asset-m)
-                       (println "Copied asset" (pr-str (node-path/basename (:path asset-m)))
-                                "by its name since it was unused.")
-                       (fs/write-plain-text-file! repo assets-dir (node-path/basename (:path asset-m)) content {:skip-transact? true})))))))))
+(defn- read-and-copy-asset [repo repo-dir file assets buffer-handler]
+  (let [^js file-object (:file-object file)]
+    (if (assets-handler/exceed-limit-size? file-object)
+      (js/console.error (str "Asset size shouldn't be larger than 100M, path: " (:path file)))
+      (p/let [buffer (.arrayBuffer file-object)
+              bytes-array (js/Uint8Array. buffer)
+              checksum (db-asset/<get-file-array-buffer-checksum buffer)
+              asset-id (d/squuid)
+              asset-name (gp-exporter/asset-path->name (:path file))
+              assets-dir (path/path-join repo-dir common-config/local-assets-dir)
+              asset-type (db-asset/asset-path->type (:path file))
+              {:keys [with-edn-content pdf-annotation?]} (buffer-handler bytes-array)]
+        (swap! assets assoc asset-name
+               (with-edn-content
+                 {:size (.-size file-object)
+                  :type asset-type
+                  :path (:path file)
+                  :checksum checksum
+                  :asset-id asset-id}))
+        (fs/mkdir-if-not-exists assets-dir)
+        (when-not pdf-annotation?
+          (fs/write-plain-text-file! repo assets-dir (str asset-id "." asset-type) bytes-array {:skip-transact? true}))))))
 
 (defn- import-file-graph
   [*files
@@ -404,8 +400,7 @@
                    :<save-logseq-file (fn save-logseq-file [_ path content]
                                         (db-editor-handler/save-file! path content))
                    ;; asset file options
-                   :<read-asset read-asset
-                   :<copy-asset #(copy-asset repo (config/get-repo-dir repo) %)
+                   :<read-and-copy-asset #(read-and-copy-asset repo (config/get-repo-dir repo) %1 %2 %3)
                    ;; doc file options
                    ;; Write to frontend first as writing to worker first is poor ux with slow streaming changes
                    :export-file (fn export-file [conn m opts]

+ 5 - 0
src/main/frontend/handler/assets.cljs

@@ -16,6 +16,11 @@
             [promesa.core :as p])
   (:import [missionary Cancelled]))
 
+(defn exceed-limit-size?
+  "Asset size no more than 100M"
+  [^js file]
+  (> (.-size file) (* 100 1024 1024)))
+
 (defn alias-enabled?
   []
   (and (util/electron?)

+ 2 - 2
src/main/frontend/handler/editor.cljs

@@ -1544,10 +1544,10 @@
                    dir repo-dir
                    asset (db/entity :logseq.class/Asset)]
 
-             (if (> (.-size file) (* 100 1024 1024)) ; 100m
+             (if (assets-handler/exceed-limit-size? file)
                (do
                  (notification/show! [:div "Asset size shouldn't be larger than 100M"]
-                                     :error
+                                     :warning
                                      false)
                  (throw (ex-info "Asset size shouldn't be larger than 100M" {:file-name file-name})))
                (p/let [properties {:logseq.property.asset/type ext