Browse Source

fix: importing assets for more complex blocks like paragraphs with links

Used extract-blocks to group ast blocks for each block instead of
previous buggy version. Also renamed bindings that were all confusingly
`block`
Gabriel Horner 4 tháng trước cách đây
mục cha
commit
501063e625

+ 28 - 23
deps/graph-parser/src/logseq/graph_parser/block.cljs

@@ -132,7 +132,7 @@
     (when (some-> block-id parse-uuid)
       block-id)))
 
-(defn- paragraph-block?
+(defn paragraph-block?
   [block]
   (and
    (vector? block)
@@ -711,36 +711,36 @@
 
 (defn extract-blocks
   "Extract headings from mldoc ast. Args:
-  *`blocks`: mldoc ast.
-  *  `content`: markdown or org-mode text.
-  *  `format`: content's format, it could be either :markdown or :org-mode.
-  *  `options`: Options are :user-config, :block-pattern, :parse-block, :date-formatter, :db and
+  * `ast`: mldoc ast.
+  * `content`: markdown or org-mode text.
+  * `format`: content's format, it could be either :markdown or :org-mode.
+  * `options`: Options are :user-config, :block-pattern, :parse-block, :date-formatter, :db and
      * :db-graph-mode? : Set when a db graph in the frontend
      * :export-to-db-graph? : Set when exporting to a db graph"
-  [blocks content format {:keys [user-config db-graph-mode? export-to-db-graph?] :as options}]
-  {:pre [(seq blocks) (string? content) (contains? #{:markdown :org} format)]}
+  [ast content format {:keys [user-config db-graph-mode? export-to-db-graph?] :as options}]
+  {:pre [(seq ast) (string? content) (contains? #{:markdown :org} format)]}
   (let [encoded-content (utf8/encode content)
-        all-blocks (vec (reverse blocks))
+        all-blocks (vec (reverse ast))
         [blocks body pre-block-properties]
         (loop [headings []
-               blocks (reverse blocks)
+               ast-blocks (reverse ast)
                block-idx 0
                timestamps {}
                properties {}
                body []]
-          (if (seq blocks)
-            (let [[block pos-meta] (first blocks)]
+          (if (seq ast-blocks)
+            (let [[ast-block pos-meta] (first ast-blocks)]
               (cond
-                (paragraph-timestamp-block? block)
-                (let [timestamps (extract-timestamps block)
+                (paragraph-timestamp-block? ast-block)
+                (let [timestamps (extract-timestamps ast-block)
                       timestamps' (merge timestamps timestamps)]
-                  (recur headings (rest blocks) (inc block-idx) timestamps' properties body))
+                  (recur headings (rest ast-blocks) (inc block-idx) timestamps' properties body))
 
-                (gp-property/properties-ast? block)
-                (let [properties (extract-properties (second block) (assoc user-config :format format))]
-                  (recur headings (rest blocks) (inc block-idx) timestamps properties body))
+                (gp-property/properties-ast? ast-block)
+                (let [properties (extract-properties (second ast-block) (assoc user-config :format format))]
+                  (recur headings (rest ast-blocks) (inc block-idx) timestamps properties body))
 
-                (heading-block? block)
+                (heading-block? ast-block)
                 ;; for db-graphs cut multi-line when there is property, deadline/scheduled or logbook text in :block/title
                 (let [cut-multiline? (and export-to-db-graph?
                                           (when-let [prev-block (first (get all-blocks (dec block-idx)))]
@@ -762,14 +762,19 @@
                                       (and export-to-db-graph?
                                            (and (gp-property/properties-ast? (first (get all-blocks (dec block-idx))))
                                                 (= "Custom" (ffirst (get all-blocks (- block-idx 2)))))))
-                      block' (construct-block block properties timestamps body encoded-content format pos-meta' options')
-                      block'' (if (or db-graph-mode? export-to-db-graph?)
+                      block' (construct-block ast-block properties timestamps body encoded-content format pos-meta' options')
+                      block'' (cond
+                                db-graph-mode?
                                 block'
-                                (assoc block' :macros (extract-macros-from-ast (cons block body))))]
-                  (recur (conj headings block'') (rest blocks) (inc block-idx) {} {} []))
+                                export-to-db-graph?
+                                (assoc block' :block.temp/ast-blocks (cons ast-block body))
+                                :else
+                                (assoc block' :macros (extract-macros-from-ast (cons ast-block body))))]
+
+                  (recur (conj headings block'') (rest ast-blocks) (inc block-idx) {} {} []))
 
                 :else
-                (recur headings (rest blocks) (inc block-idx) timestamps properties (conj body block))))
+                (recur headings (rest ast-blocks) (inc block-idx) timestamps properties (conj body ast-block))))
             [(-> (reverse headings)
                  sanity-blocks-data)
              body

+ 31 - 41
deps/graph-parser/src/logseq/graph_parser/exporter.cljs

@@ -33,7 +33,8 @@
             [logseq.graph-parser.block :as gp-block]
             [logseq.graph-parser.extract :as extract]
             [logseq.graph-parser.property :as gp-property]
-            [promesa.core :as p]))
+            [promesa.core :as p]
+            [clojure.walk :as walk]))
 
 (defn- add-missing-timestamps
   "Add updated-at or created-at timestamps if they doesn't exist"
@@ -894,18 +895,27 @@
   [path]
   (re-find #"assets/.*$" path))
 
+(defn- find-all-asset-links
+  "Walks each ast block in order to its full depth as Link asts can be in different
+   locations e.g. a Heading vs a Paragraph ast block"
+  [ast-blocks]
+  (let [results (atom [])]
+    (walk/prewalk
+     (fn [x]
+       (when (and (vector? x)
+                  (= "Link" (first x))
+                  (common-config/local-asset? (second (:url (second x)))))
+         (swap! results conj x))
+       x)
+     ast-blocks)
+    @results))
+
 (defn- handle-assets-in-block
-  [block block-ast {:keys [assets ignored-assets]}]
-  (let [asset-links
-        (->> block-ast
-             (mapcat (fn [n]
-                       (some->> n
-                                second
-                                :title
-                                (filter #(and (= "Link" (first %))
-                                              (common-config/local-asset? (second (:url (second %))))))))))
+  [block* {:keys [assets ignored-assets]}]
+  (let [block (dissoc block* :block.temp/ast-blocks)
+        asset-links (find-all-asset-links (:block.temp/ast-blocks block*))
         asset-link (first asset-links)
-        asset-name (some->> asset-link second :url second asset-path->name)]
+        asset-name (some-> asset-link second :url second asset-path->name)]
     (when (> (count asset-links) 1)
       (swap! ignored-assets into
              (map #(hash-map
@@ -919,7 +929,7 @@
           ;; Link to existing assets instead of creating duplicates to preserve identity
           (assoc block :block/title (page-ref/->page-ref (:block/uuid asset-data)))
           (do
-            (prn :asset-added! (node-path/basename asset-name) #_(get @assets asset-name))
+            ;; (prn :asset-added! (node-path/basename asset-name) #_(get @assets asset-name))
             ;; (cljs.pprint/pprint asset-link)
             (swap! assets assoc-in [asset-name :block/uuid] (:block/uuid block))
             (merge block
@@ -939,7 +949,7 @@
       block)))
 
 (defn- build-block-tx
-  [db block* pre-blocks {:keys [page-names-to-uuids block-to-ast] :as per-file-state} {:keys [import-state journal-created-ats] :as options}]
+  [db block* pre-blocks {:keys [page-names-to-uuids] :as per-file-state} {:keys [import-state journal-created-ats] :as options}]
   ;; (prn ::block-in block*)
   (let [;; needs to come before update-block-refs to detect new property schemas
         {:keys [block properties-tx]}
@@ -950,13 +960,12 @@
         prepared-block (cond-> block-after-built-in-props
                          journal-page-created-at
                          (assoc :block/created-at journal-page-created-at))
-        block-ast (get block-to-ast (:block/uuid block*))
         block' (-> prepared-block
                    (fix-pre-block-references pre-blocks page-names-to-uuids)
                    (fix-block-name-lookup-ref page-names-to-uuids)
                    (update-block-refs page-names-to-uuids options)
                    (update-block-tags db (:user-options options) per-file-state (:all-idents import-state))
-                   (handle-assets-in-block block-ast (select-keys import-state [:assets :ignored-assets]))
+                   (handle-assets-in-block (select-keys import-state [:assets :ignored-assets]))
                    (update-block-marker options)
                    (update-block-priority options)
                    add-missing-timestamps
@@ -1320,31 +1329,9 @@
              (update :block/refs fix-block-uuids {:ref? true :properties (:block/properties b)})))
          blocks)))
 
-(defn- extract-page [file content extract-options notify-user]
-  (let [{:keys [blocks ast] :as parsed-page}
-        (-> (extract/extract file content extract-options)
-            (update :pages (fn [pages]
-                             (map #(dissoc % :block.temp/original-page-name) pages)))
-            (update :blocks fix-extracted-block-tags-and-refs))
-        page-blocks (remove :block/pre-block? blocks)
-        ;; Like in gp-block/extract-blocks, only treat heading blocks as blocks
-        block-asts (filter (comp gp-block/heading-block? first) ast)
-        block-to-ast (if (= (count page-blocks) (count block-asts))
-                       (into {}
-                             (map vector
-                                  (map :block/uuid page-blocks)
-                                  block-asts))
-                       (do
-                         ;; Notify as this should be easy to fix and could lead to missing data
-                         (notify-user {:msg (str "Blocks and AST do not match for file " (pr-str file))})
-                         {}))]
-    (cond-> parsed-page
-      (some? block-to-ast)
-      (assoc :block-to-ast block-to-ast))))
-
 (defn- extract-pages-and-blocks
   "Main fn which calls graph-parser to convert markdown into data"
-  [db file content {:keys [extract-options import-state notify-user]}]
+  [db file content {:keys [extract-options import-state]}]
   (let [format (common-util/get-format file)
         ;; TODO: Remove once pdf highlights are supported
         ignored-highlight-file? (string/starts-with? (str (path/basename file)) "hls__")
@@ -1357,7 +1344,10 @@
                                 extract-options
                                 {:db db})]
     (cond (and (contains? common-config/mldoc-support-formats format) (not ignored-highlight-file?))
-          (extract-page file content extract-options' notify-user)
+          (-> (extract/extract file content extract-options')
+              (update :pages (fn [pages]
+                               (map #(dissoc % :block.temp/original-page-name) pages)))
+              (update :blocks fix-extracted-block-tags-and-refs))
 
           (common-config/whiteboard? file)
           (-> (extract/extract-whiteboard-edn file content extract-options')
@@ -1440,7 +1430,7 @@
                            log-fn prn}
                       :as *options}]
   (let [options (assoc *options :notify-user notify-user :log-fn log-fn)
-        {:keys [pages blocks block-to-ast]} (extract-pages-and-blocks @conn file content options)
+        {:keys [pages blocks]} (extract-pages-and-blocks @conn file content options)
         tx-options (merge (build-tx-options options)
                           {:journal-created-ats (build-journal-created-ats pages)})
         old-properties (keys @(get-in options [:import-state :property-schemas]))
@@ -1455,7 +1445,7 @@
         pre-blocks (->> blocks (keep #(when (:block/pre-block? %) (:block/uuid %))) set)
         blocks-tx (->> blocks
                        (remove :block/pre-block?)
-                       (mapcat #(build-block-tx @conn % pre-blocks (assoc per-file-state :block-to-ast block-to-ast)
+                       (mapcat #(build-block-tx @conn % pre-blocks per-file-state
                                                 (assoc tx-options :whiteboard? (some? (seq whiteboard-pages)))))
                        vec)
         {:keys [property-pages-tx property-page-properties-tx] pages-tx' :pages-tx}

+ 6 - 2
deps/graph-parser/test/logseq/graph_parser/exporter_test.cljs

@@ -118,7 +118,11 @@
                         {:user-options (merge {:convert-all-tags? false} (dissoc options :assets :verbose))
                         ;; asset file options
                          :<read-asset <read-asset-file
-                         :<copy-asset #(swap! assets conj %)}
+                         :<copy-asset (fn copy-asset [m]
+                                        (when-not (:block/uuid m)
+                                          (println "[INFO]" "Asset" (pr-str (node-path/basename (:path m)))
+                                                   "does not have a :block/uuid"))
+                                        (swap! assets conj m))}
                         (select-keys options [:verbose]))]
     (gp-exporter/export-file-graph conn conn config-file *files options')))
 
@@ -167,7 +171,7 @@
                 (remove #(= [{:db/ident :logseq.class/Tag}] (:block/tags %)))))
         "All classes only have :logseq.class/Tag as their tag (and don't have Page)")))
 
-(deftest-async ^:focus export-basic-graph-with-convert-all-tags
+(deftest-async export-basic-graph-with-convert-all-tags
   ;; This graph will contain basic examples of different features to import
   (p/let [file-graph-dir "test/resources/exporter-test-graph"
           conn (db-test/create-conn)

+ 4 - 1
deps/graph-parser/test/resources/exporter-test-graph/journals/2025_06_12.md

@@ -1,2 +1,5 @@
-- ![dino!](assets/subdir/partydino.gif){:width 105} tests an asset with a manual link, custom title and in a subdirectory
+- Test paragraph before an asset
+  
+  
+  ![dino!](assets/subdir/partydino.gif){:width 105} tests an asset with a manual link, custom title and in a subdirectory
 - ![greg-popovich-thumbs-up.png](../assets/greg-popovich-thumbs-up_1704749687791_0.png){:height 288, :width 252}