Browse Source

fix: support external pdf files when importing from file graphs

Tienson Qin 2 months ago
parent
commit
270f13f11a

+ 161 - 132
deps/graph-parser/src/logseq/graph_parser/exporter.cljs

@@ -901,7 +901,10 @@
        (cond
          (and (vector? x)
               (= "Link" (first x))
-              (common-config/local-relative-asset? (second (:url (second x)))))
+              (let [path (second (:url (second x)))]
+                (when (string? path)
+                  (or (common-config/local-relative-asset? path)
+                      (string/ends-with? path ".pdf")))))
          (swap! results update :asset-links conj x)
          (and (vector? x)
               (= "Macro" (first x))
@@ -1028,7 +1031,10 @@
   "Given an asset's relative or full path, create a unique name for identifying an asset.
    Must handle to paths as ../assets/*, assets/* and with subdirectories"
   [path]
-  (re-find #"assets/.*$" path))
+  (or (re-find #"assets/.*$" path)
+      ;; pdf outside logseq graphs
+      (when (string/ends-with? path ".pdf")
+        path)))
 
 (defn- update-asset-links-in-block-title [block-title asset-name-to-uuids ignored-assets]
   (reduce (fn [acc [asset-name asset-uuid]]
@@ -1131,7 +1137,9 @@
          {:block/tags [:logseq.class/Asset]
           :logseq.property.asset/type (:type asset-data)
           :logseq.property.asset/checksum (:checksum asset-data)
-          :logseq.property.asset/size (:size asset-data)}))
+          :logseq.property.asset/size (:size asset-data)}
+         (when-let [external-url (:external-url asset-data)]
+           {:logseq.property.asset/external-url external-url})))
 
 (defn- get-asset-block-id
   [assets path]
@@ -1180,54 +1188,72 @@
         (concat txs
                 (build-pdf-annotations-tx* asset-edn-map (get @pdf-annotation-pages asset-md-name) parent-asset image-asset-name-to-uuids opts))))))
 
-(defn- handle-assets-in-block
+(defn- <handle-assets-in-block
   "If a block contains assets, creates them as #Asset nodes in the Asset page and references them in the block."
-  [block {:keys [asset-links]} {:keys [assets ignored-assets pdf-annotation-pages]} {:keys [notify-user] :as opts}]
+  [block {:keys [asset-links]} {:keys [assets ignored-assets pdf-annotation-pages]} {:keys [notify-user <get-file-stat] :as opts}]
   (if (seq asset-links)
-    (let [asset-maps
-          (keep
-           (fn [asset-link]
-             (let [asset-name (-> asset-link second :url second asset-path->name)]
-               (if-let [asset-data (and asset-name (get @assets asset-name))]
-                 (cond
-                   (not (get-asset-block-id assets asset-name))
-                   (notify-user {:msg (str "Skipped creating asset " (pr-str asset-name) " because it has no asset id")
-                                 :level :error})
-
-                   ;; If asset tx is already built, no need to do it again
-                   (:asset-created? asset-data)
-                   {:asset-name-uuid [asset-name (:asset-id asset-data)]}
-
-                   :else
-                   (let [new-asset (merge (build-new-asset asset-data)
-                                          {:block/title (db-asset/asset-name->title (node-path/basename asset-name))
-                                           :block/uuid (get-asset-block-id assets asset-name)}
-                                          (when-let [metadata (not-empty (common-util/safe-read-map-string (:metadata (second asset-link))))]
-                                            {:logseq.property.asset/resize-metadata metadata}))
-                         pdf-annotations-tx (when (= "pdf" (path/file-ext asset-name))
-                                              (build-pdf-annotations-tx asset-name assets new-asset pdf-annotation-pages opts))
-                         asset-tx (concat [new-asset]
-                                          (when pdf-annotations-tx pdf-annotations-tx))]
-                    ;;  (prn :asset-added! (node-path/basename asset-name))
-                    ;;  (cljs.pprint/pprint asset-link)
-                     (swap! assets assoc-in [asset-name :asset-created?] true)
-                     {:asset-name-uuid [asset-name (:block/uuid new-asset)]
-                      :asset-tx asset-tx}))
-                 (do
-                   (swap! ignored-assets conj
-                          {:reason "No asset data found for this asset path"
-                           :path (-> asset-link second :url second)
-                           :location {:block (:block/title block)}})
-                   nil))))
-           asset-links)
-          asset-blocks (mapcat :asset-tx asset-maps)
-          asset-names-to-uuids
-          (into {} (map :asset-name-uuid asset-maps))]
+    (p/let [asset-maps* (p/all (map
+                                (fn [asset-link]
+                                  (p/let [path (-> asset-link second :url second)
+                                          asset-name (-> path asset-path->name)
+                                          asset-data* (when asset-name (get @assets asset-name))
+                                          _ (when (and asset-name
+                                                       (not asset-data*)
+                                                       (string/ends-with? path ".pdf")
+                                                       (fn? <get-file-stat)) ; external pdf
+                                              (->
+                                               (p/let [^js stat (<get-file-stat path)]
+                                                 (swap! assets assoc asset-name
+                                                        {:asset-id (d/squuid)
+                                                         :type "pdf"
+                                                         ;; avoid using the real checksum since it could be the same with in-graph asset
+                                                         :checksum "0000000000000000000000000000000000000000000000000000000000000000"
+                                                         :size (.-size stat)
+                                                         :external-url path}))
+                                               (p/catch (fn [error]
+                                                          (js/console.error error)))))
+                                          asset-data (when asset-name (get @assets asset-name))]
+                                    (if asset-data
+                                      (cond
+                                        (not (get-asset-block-id assets asset-name))
+                                        (notify-user {:msg (str "Skipped creating asset " (pr-str asset-name) " because it has no asset id")
+                                                      :level :error})
+
+                                        ;; If asset tx is already built, no need to do it again
+                                        (:asset-created? asset-data)
+                                        {:asset-name-uuid [asset-name (:asset-id asset-data)]}
+
+                                        :else
+                                        (let [new-asset (merge (build-new-asset asset-data)
+                                                               {:block/title (db-asset/asset-name->title (node-path/basename asset-name))
+                                                                :block/uuid (get-asset-block-id assets asset-name)}
+                                                               (when-let [metadata (not-empty (common-util/safe-read-map-string (:metadata (second asset-link))))]
+                                                                 {:logseq.property.asset/resize-metadata metadata}))
+                                              pdf-annotations-tx (when (= "pdf" (path/file-ext asset-name))
+                                                                   (build-pdf-annotations-tx asset-name assets new-asset pdf-annotation-pages opts))
+                                              asset-tx (concat [new-asset] pdf-annotations-tx)]
+                                          ;; (prn :asset-added! (node-path/basename asset-name))
+                                          ;; (cljs.pprint/pprint asset-link)
+                                          ;; (prn :debug :asset-tx asset-tx)
+                                          (swap! assets assoc-in [asset-name :asset-created?] true)
+                                          {:asset-name-uuid [asset-name (:block/uuid new-asset)]
+                                           :asset-tx asset-tx}))
+                                      (do
+                                        (swap! ignored-assets conj
+                                               {:reason "No asset data found for this asset path"
+                                                :path (-> asset-link second :url second)
+                                                :location {:block (:block/title block)}})
+                                        nil))))
+                                asset-links))
+            asset-maps (remove nil? asset-maps*)
+            asset-blocks (mapcat :asset-tx asset-maps)
+            asset-names-to-uuids
+            (into {} (map :asset-name-uuid asset-maps))]
       (cond-> {:block
                (update block :block/title update-asset-links-in-block-title asset-names-to-uuids ignored-assets)}
         (seq asset-blocks)
         (assoc :asset-blocks-tx asset-blocks)))
-    {:block block}))
+    (p/resolved {:block block})))
 
 (defn- handle-quotes
   "If a block contains a quote, convert block to #Quote node"
@@ -1263,36 +1289,37 @@
         block))
     block))
 
-(defn- build-block-tx
+(defn- <build-block-tx
   [db block* pre-blocks {:keys [page-names-to-uuids] :as per-file-state} {:keys [import-state journal-created-ats] :as options}]
   ;; (prn ::block-in block*)
-  (let [walked-ast-blocks (walk-ast-blocks (:block.temp/ast-blocks block*))
+  (p/let [walked-ast-blocks (walk-ast-blocks (:block.temp/ast-blocks block*))
         ;; needs to come before update-block-refs to detect new property schemas
-        {:keys [block properties-tx]}
-        (handle-block-properties block* db page-names-to-uuids (:block/refs block*) walked-ast-blocks options)
-        {block-after-built-in-props :block deadline-properties-tx :properties-tx}
-        (update-block-deadline-and-scheduled block page-names-to-uuids options)
-        {block-after-assets :block :keys [asset-blocks-tx]}
-        (handle-assets-in-block block-after-built-in-props walked-ast-blocks import-state (select-keys options [:log-fn :notify-user]))
-        ;; :block/page should be [:block/page NAME]
-        journal-page-created-at (some-> (:block/page block*) second journal-created-ats)
-        prepared-block (cond-> block-after-assets
-                         journal-page-created-at
-                         (assoc :block/created-at journal-page-created-at))
-        block' (-> prepared-block
-                   (fix-pre-block-references pre-blocks page-names-to-uuids)
-                   (fix-block-name-lookup-ref page-names-to-uuids)
-                   (update-block-refs page-names-to-uuids options)
-                   (update-block-tags db (:user-options options) per-file-state (:all-idents import-state))
-                   (handle-embeds page-names-to-uuids walked-ast-blocks (select-keys options [:log-fn]))
-                   (handle-quotes (select-keys options [:log-fn]))
-                   (update-block-marker options)
-                   (update-block-priority options)
-                   add-missing-timestamps
+          {:keys [block properties-tx]}
+          (handle-block-properties block* db page-names-to-uuids (:block/refs block*) walked-ast-blocks options)
+          {block-after-built-in-props :block deadline-properties-tx :properties-tx}
+          (update-block-deadline-and-scheduled block page-names-to-uuids options)
+          {block-after-assets :block :keys [asset-blocks-tx]}
+          (<handle-assets-in-block block-after-built-in-props walked-ast-blocks import-state (select-keys options [:log-fn :notify-user :<get-file-stat]))
+
+          ;; :block/page should be [:block/page NAME]
+          journal-page-created-at (some-> (:block/page block*) second journal-created-ats)
+          prepared-block (cond-> block-after-assets
+                           journal-page-created-at
+                           (assoc :block/created-at journal-page-created-at))
+          block' (-> prepared-block
+                     (fix-pre-block-references pre-blocks page-names-to-uuids)
+                     (fix-block-name-lookup-ref page-names-to-uuids)
+                     (update-block-refs page-names-to-uuids options)
+                     (update-block-tags db (:user-options options) per-file-state (:all-idents import-state))
+                     (handle-embeds page-names-to-uuids walked-ast-blocks (select-keys options [:log-fn]))
+                     (handle-quotes (select-keys options [:log-fn]))
+                     (update-block-marker options)
+                     (update-block-priority options)
+                     add-missing-timestamps
                    ;; old whiteboards may have :block/left
-                   (dissoc :block/left :block/format :block.temp/ast-blocks)
+                     (dissoc :block/left :block/format :block.temp/ast-blocks)
                   ;;  ((fn [x] (prn ::block-out x) x))
-                   )]
+                     )]
     ;; Order matters as previous txs are referenced in block
     (concat properties-tx deadline-properties-tx asset-blocks-tx [block'])))
 
@@ -1760,7 +1787,7 @@
   (when-let [nodes (seq (filter :block/name txs))]
     (swap! (:all-existing-page-uuids import-state) merge (into {} (map (juxt :block/uuid identity) nodes)))))
 
-(defn add-file-to-db-graph
+(defn <add-file-to-db-graph
   "Parse file and save parsed data to the given db graph. Options available:
 
 * :extract-options - Options map to pass to extract/extract
@@ -1775,63 +1802,65 @@
                       :or {notify-user #(println "[WARNING]" (:msg %))
                            log-fn prn}
                       :as *options}]
-  (let [options (assoc *options :notify-user notify-user :log-fn log-fn :file file)
-        {:keys [pages blocks]} (extract-pages-and-blocks @conn file content options)
-        tx-options (merge (build-tx-options options)
-                          {:journal-created-ats (build-journal-created-ats pages)})
-        old-properties (keys @(get-in options [:import-state :property-schemas]))
-        ;; Build page and block txs
-        {:keys [pages-tx page-properties-tx per-file-state existing-pages]} (build-pages-tx conn pages blocks tx-options)
-        whiteboard-pages (->> pages-tx
+  (p/let [options (assoc *options :notify-user notify-user :log-fn log-fn :file file)
+          {:keys [pages blocks]} (extract-pages-and-blocks @conn file content options)
+          tx-options (merge (build-tx-options options)
+                            {:journal-created-ats (build-journal-created-ats pages)})
+          old-properties (keys @(get-in options [:import-state :property-schemas]))
+          ;; Build page and block txs
+          {:keys [pages-tx page-properties-tx per-file-state existing-pages]} (build-pages-tx conn pages blocks tx-options)
+          whiteboard-pages (->> pages-tx
                               ;; support old and new whiteboards
-                              (filter ldb/whiteboard?)
-                              (map (fn [page-block]
-                                     (-> page-block
-                                         (assoc :logseq.property/ls-type :whiteboard-page)))))
-        pre-blocks (->> blocks (keep #(when (:block/pre-block? %) (:block/uuid %))) set)
-        blocks-tx (->> blocks
-                       (remove :block/pre-block?)
-                       (mapcat #(build-block-tx @conn % pre-blocks per-file-state
-                                                (assoc tx-options :whiteboard? (some? (seq whiteboard-pages)))))
-                       vec)
-        {:keys [property-pages-tx property-page-properties-tx] pages-tx' :pages-tx}
-        (split-pages-and-properties-tx pages-tx old-properties existing-pages (:import-state options) @(:upstream-properties tx-options))
-        ;; _ (when (seq property-pages-tx) (cljs.pprint/pprint {:property-pages-tx property-pages-tx}))
-        ;; Necessary to transact new property entities first so that block+page properties can be transacted next
-        main-props-tx-report (d/transact! conn property-pages-tx {::new-graph? true ::path file})
-        _ (save-from-tx property-pages-tx options)
-
-        classes-tx @(:classes-tx tx-options)
-        {:keys [retract-page-tags-tx] pages-tx'' :pages-tx} (clean-extra-invalid-tags @conn pages-tx' classes-tx existing-pages)
-        classes-tx' (concat classes-tx retract-page-tags-tx)
-        ;; Build indices
-        pages-index (->> (map #(select-keys % [:block/uuid]) pages-tx'')
-                         (concat (map #(select-keys % [:block/uuid]) classes-tx))
-                         distinct)
-        block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks-tx)
-        block-refs-ids (->> (mapcat :block/refs blocks-tx)
-                            (filter (fn [ref] (and (vector? ref)
-                                                   (= :block/uuid (first ref)))))
-                            (map (fn [ref] {:block/uuid (second ref)}))
-                            (seq))
-        ;; To prevent "unique constraint" on datascript
-        blocks-index (set/union (set block-ids) (set block-refs-ids))
-        ;; Order matters. pages-index and blocks-index needs to come before their corresponding tx for
-        ;; uuids to be valid. Also upstream-properties-tx comes after blocks-tx to possibly override blocks
-        tx (concat whiteboard-pages pages-index page-properties-tx property-page-properties-tx pages-tx'' classes-tx' blocks-index blocks-tx)
-        tx' (common-util/fast-remove-nils tx)
-        ;; (prn :tx-counts (map #(vector %1 (count %2))
-        ;;                        [:whiteboard-pages :pages-index :page-properties-tx :property-page-properties-tx :pages-tx' :classes-tx :blocks-index :blocks-tx]
-        ;;                        [whiteboard-pages pages-index page-properties-tx property-page-properties-tx pages-tx' classes-tx blocks-index blocks-tx]))
-        ;; _ (when (not (seq whiteboard-pages)) (cljs.pprint/pprint {#_:property-pages-tx #_property-pages-tx :pages-tx pages-tx :tx tx'}))
-        main-tx-report (d/transact! conn tx' {::new-graph? true ::path file})
-        _ (save-from-tx tx' options)
-
-        upstream-properties-tx
-        (build-upstream-properties-tx @conn @(:upstream-properties tx-options) (:import-state options) log-fn)
-        ;; _ (when (seq upstream-properties-tx) (cljs.pprint/pprint {:upstream-properties-tx upstream-properties-tx}))
-        upstream-tx-report (when (seq upstream-properties-tx) (d/transact! conn upstream-properties-tx {::new-graph? true ::path file}))
-        _ (save-from-tx upstream-properties-tx options)]
+                                (filter ldb/whiteboard?)
+                                (map (fn [page-block]
+                                       (-> page-block
+                                           (assoc :logseq.property/ls-type :whiteboard-page)))))
+          pre-blocks (->> blocks (keep #(when (:block/pre-block? %) (:block/uuid %))) set)
+          blocks-tx (p/loop [tx-data []
+                             blocks (remove :block/pre-block? blocks)]
+                      (if-let [block (first blocks)]
+                        (p/let [block-tx-data (<build-block-tx @conn block pre-blocks per-file-state
+                                                               (assoc tx-options :whiteboard? (some? (seq whiteboard-pages))))]
+                          (p/recur (concat tx-data block-tx-data) (rest blocks)))
+                        tx-data))
+          {:keys [property-pages-tx property-page-properties-tx] pages-tx' :pages-tx}
+          (split-pages-and-properties-tx pages-tx old-properties existing-pages (:import-state options) @(:upstream-properties tx-options))
+          ;; _ (when (seq property-pages-tx) (cljs.pprint/pprint {:property-pages-tx property-pages-tx}))
+          ;; Necessary to transact new property entities first so that block+page properties can be transacted next
+          main-props-tx-report (d/transact! conn property-pages-tx {::new-graph? true ::path file})
+          _ (save-from-tx property-pages-tx options)
+
+          classes-tx @(:classes-tx tx-options)
+          {:keys [retract-page-tags-tx] pages-tx'' :pages-tx} (clean-extra-invalid-tags @conn pages-tx' classes-tx existing-pages)
+          classes-tx' (concat classes-tx retract-page-tags-tx)
+          ;; Build indices
+          pages-index (->> (map #(select-keys % [:block/uuid]) pages-tx'')
+                           (concat (map #(select-keys % [:block/uuid]) classes-tx))
+                           distinct)
+          block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks-tx)
+          block-refs-ids (->> (mapcat :block/refs blocks-tx)
+                              (filter (fn [ref] (and (vector? ref)
+                                                     (= :block/uuid (first ref)))))
+                              (map (fn [ref] {:block/uuid (second ref)}))
+                              (seq))
+          ;; To prevent "unique constraint" on datascript
+          blocks-index (set/union (set block-ids) (set block-refs-ids))
+          ;; Order matters. pages-index and blocks-index needs to come before their corresponding tx for
+          ;; uuids to be valid. Also upstream-properties-tx comes after blocks-tx to possibly override blocks
+          tx (concat whiteboard-pages pages-index page-properties-tx property-page-properties-tx pages-tx'' classes-tx' blocks-index blocks-tx)
+          tx' (common-util/fast-remove-nils tx)
+          ;; (prn :tx-counts (map #(vector %1 (count %2))
+          ;;                        [:whiteboard-pages :pages-index :page-properties-tx :property-page-properties-tx :pages-tx' :classes-tx :blocks-index :blocks-tx]
+          ;;                        [whiteboard-pages pages-index page-properties-tx property-page-properties-tx pages-tx' classes-tx blocks-index blocks-tx]))
+          ;; _ (when (not (seq whiteboard-pages)) (cljs.pprint/pprint {#_:property-pages-tx #_property-pages-tx :pages-tx pages-tx :tx tx'}))
+          main-tx-report (d/transact! conn tx' {::new-graph? true ::path file})
+          _ (save-from-tx tx' options)
+
+          upstream-properties-tx
+          (build-upstream-properties-tx @conn @(:upstream-properties tx-options) (:import-state options) log-fn)
+          ;; _ (when (seq upstream-properties-tx) (cljs.pprint/pprint {:upstream-properties-tx upstream-properties-tx}))
+          upstream-tx-report (when (seq upstream-properties-tx) (d/transact! conn upstream-properties-tx {::new-graph? true ::path file}))
+          _ (save-from-tx upstream-properties-tx options)]
 
     ;; Return all tx-reports that occurred in this fn as UI needs to know what changed
     [main-props-tx-report main-tx-report upstream-tx-report]))
@@ -1841,17 +1870,17 @@
 
 (defn- export-doc-file
   [{:keys [path idx] :as file} conn <read-file
-   {:keys [notify-user set-ui-state export-file]
+   {:keys [notify-user set-ui-state <export-file]
     :or {set-ui-state (constantly nil)
-         export-file (fn export-file [conn m opts]
-                       (add-file-to-db-graph conn (:file/path m) (:file/content m) opts))}
+         <export-file (fn <export-file [conn m opts]
+                        (<add-file-to-db-graph conn (:file/path m) (:file/content m) opts))}
     :as options}]
   ;; (prn :export-doc-file path idx)
   (-> (p/let [_ (set-ui-state [:graph/importing-state :current-idx] (inc idx))
               _ (set-ui-state [:graph/importing-state :current-page] path)
               content (<read-file file)
               m {:file/path path :file/content content}]
-        (export-file conn m (dissoc options :set-ui-state :export-file))
+        (<export-file conn m (dissoc options :set-ui-state :<export-file))
         ;; returning val results in smoother ui updates
         m)
       (p/catch (fn [error]
@@ -2048,7 +2077,7 @@
        :user-options (merge {:remove-inline-tags? true :convert-all-tags? true} (:user-options options))
        :import-state (new-import-state)
        :macros (or (:macros options) (:macros config))}
-      (merge (select-keys options [:set-ui-state :export-file :notify-user]))))
+      (merge (select-keys options [:set-ui-state :<export-file :notify-user :<get-file-stat]))))
 
 (defn- move-top-parent-pages-to-library
   [conn repo-or-conn]

+ 9 - 5
src/main/frontend/components/imports.cljs

@@ -4,6 +4,7 @@
             [cljs.pprint :as pprint]
             [clojure.string :as string]
             [datascript.core :as d]
+            [electron.ipc :as ipc]
             [frontend.components.onboarding.setups :as setups]
             [frontend.components.repo :as repo]
             [frontend.components.svg :as svg]
@@ -397,6 +398,9 @@
                    :notify-user show-notification
                    :set-ui-state state/set-state!
                    :<read-file (fn <read-file [file] (.text (:file-object file)))
+                   :<get-file-stat (fn <get-file-stat [path]
+                                     (when (util/electron?)
+                                       (ipc/ipc :stat path)))
                    ;; config file options
                    :default-config config/config-default-content
                    :<save-config-file (fn save-config-file [_ path content]
@@ -408,11 +412,11 @@
                    :<read-and-copy-asset #(read-and-copy-asset repo (config/get-repo-dir repo) %1 %2 %3)
                    ;; doc file options
                    ;; Write to frontend first as writing to worker first is poor ux with slow streaming changes
-                   :export-file (fn export-file [conn m opts]
-                                  (let [tx-reports
-                                        (gp-exporter/add-file-to-db-graph conn (:file/path m) (:file/content m) opts)]
-                                    (doseq [tx-report tx-reports]
-                                      (db-browser/transact! repo (:tx-data tx-report) (:tx-meta tx-report)))))}
+                   :<export-file (fn <export-file [conn m opts]
+                                   (p/let [tx-reports
+                                           (gp-exporter/<add-file-to-db-graph conn (:file/path m) (:file/content m) opts)]
+                                     (doseq [tx-report tx-reports]
+                                       (db-browser/transact! repo (:tx-data tx-report) (:tx-meta tx-report)))))}
           {:keys [files import-state]} (gp-exporter/export-file-graph repo db-conn config-file *files options)]
     (log/info :import-file-graph {:msg (str "Import finished in " (/ (t/in-millis (t/interval start-time (t/now))) 1000) " seconds")})
     (state/set-state! :graph/importing nil)