Browse Source

Merge branch 'master' into enhance/mobile-silk

charlie 4 months ago
parent
commit
d63d5dce09

+ 2 - 1
deps/graph-parser/package.json

@@ -7,7 +7,8 @@
     "better-sqlite3": "11.10.0"
   },
   "dependencies": {
-    "mldoc": "^1.5.9"
+    "mldoc": "^1.5.9",
+    "sanitize-filename": "1.6.3"
   },
   "scripts": {
     "test": "nbb-logseq -cp test:../outliner/src -m nextjournal.test-runner",

+ 3 - 2
deps/graph-parser/script/db_import.cljs

@@ -57,14 +57,15 @@
            {:size (.-length buffer)
             :checksum checksum
             :type (db-asset/asset-path->type (:path file))
-            :path (:path file)})))
+            :path (:path file)})
+    buffer))
 
 (defn- <copy-asset-file [asset-m db-graph-dir]
   (p/let [parent-dir (node-path/join db-graph-dir common-config/local-assets-dir)
           _ (fsp/mkdir parent-dir #js {:recursive true})]
     (if (:block/uuid asset-m)
       (fsp/copyFile (:path asset-m) (node-path/join parent-dir (str (:block/uuid asset-m) "." (:type asset-m))))
-      (do
+      (when-not (:pdf-annotation? asset-m)
         (println "[INFO]" "Copied asset" (pr-str (node-path/basename (:path asset-m)))
                  "by its name since it was unused.")
         (fsp/copyFile (:path asset-m) (node-path/join parent-dir (node-path/basename (:path asset-m))))))))

+ 187 - 50
deps/graph-parser/src/logseq/graph_parser/exporter.cljs

@@ -2,6 +2,7 @@
   "Exports a file graph to DB graph. Used by the File to DB graph importer and
   by nbb-logseq CLIs"
   (:require ["path" :as node-path]
+            ["sanitize-filename" :as sanitizeFilename]
             [borkdude.rewrite-edn :as rewrite]
             [cljs-time.coerce :as tc]
             [cljs.pprint]
@@ -35,6 +36,7 @@
             [logseq.graph-parser.block :as gp-block]
             [logseq.graph-parser.extract :as extract]
             [logseq.graph-parser.property :as gp-property]
+            [logseq.graph-parser.utf8 :as utf8]
             [promesa.core :as p]))
 
 (defn- add-missing-timestamps
@@ -1005,9 +1007,137 @@
           block-title
           asset-name-to-uuids))
 
+(defn find-annotation-children-blocks
+  "Given a list of blocks and a set of parent uuids, return all blocks that are
+   descendants via :block/parent of given parent uuids"
+  [blocks parent-uuids]
+  (let [get-descendant-uuids
+        (fn get-descendant-uuids [acc-uuids seen]
+          (let [new-blocks (filter #(contains? acc-uuids (second (:block/parent %))) blocks)
+                new-uuids  (set (map :block/uuid new-blocks))
+                unseen     (set/difference new-uuids seen)]
+            (if (empty? unseen)
+              seen
+              (recur unseen (set/union seen unseen)))))
+        parent-and-descendant-uuids (get-descendant-uuids parent-uuids parent-uuids)
+        only-descendants (set/difference parent-and-descendant-uuids parent-uuids)]
+    (filter #(contains? only-descendants (:block/uuid %)) blocks)))
+
+(defn- build-annotation-block
+  [m color-text-idents parent-asset image-asset-name-to-uuids md-blocks {:keys [log-fn] :or {log-fn prn}}]
+  (let [user-attributes
+        {:logseq.property.pdf/hl-color (get color-text-idents (get-in m [:properties :color]))
+         :logseq.property.pdf/hl-page (:page m)
+         :block/title (get-in m [:content :text])}
+        _ (when (some (comp nil? val) user-attributes)
+            (log-fn :missing-annotation-attributes "Annotation is missing some attributes so set reasonable defaults for them"
+                    {:annotation user-attributes :asset (:block/title parent-asset)}))
+        asset-image-uuid (some (fn [[asset-name image-uuid]]
+                                 (when (string/includes? asset-name
+                                                         (str (:id m)
+                                                              (when (get-in m [:content :image])
+                                                                (str "_" (get-in m [:content :image])))))
+                                   image-uuid))
+                               image-asset-name-to-uuids)
+        md-block (get md-blocks (:id m))
+        annotation (merge
+                     ;; Reasonable defaults for user attributes
+                    {:logseq.property.pdf/hl-color :logseq.property/color.yellow
+                     :logseq.property.pdf/hl-page 1
+                     :block/title ""}
+                    user-attributes
+                    {:block/uuid (:id m)
+                     :block/order (db-order/gen-key)
+                     :logseq.property/ls-type :annotation
+                     :logseq.property.pdf/hl-value m
+                     :logseq.property/asset [:block/uuid (:block/uuid parent-asset)]
+                     :block/tags [:logseq.class/Pdf-annotation]
+                     :block/parent [:block/uuid (:block/uuid parent-asset)]
+                     :block/page :logseq.class/Asset}
+                    (when asset-image-uuid
+                      {:logseq.property.pdf/hl-image [:block/uuid asset-image-uuid]
+                       :logseq.property.pdf/hl-type :area})
+                    (when md-block
+                      (select-keys md-block [:block/title])))]
+    (sqlite-util/block-with-timestamps annotation)))
+
+(defn- build-pdf-annotations-tx*
+  "Creates annotations for a pdf asset given the asset's edn map and parsed markdown file"
+  [asset-edn-map parsed-md parent-asset image-asset-name-to-uuids opts]
+  (let [color-text-idents
+        (->> (get-in db-property/built-in-properties [:logseq.property.pdf/hl-color :closed-values])
+             (map (juxt :value :db-ident))
+             (into {}))
+        md-blocks
+        (->> parsed-md
+             :blocks
+             ;; Currently we can only import text of any md annotation blocks. No tags or properties
+             (map #(vector (:block/uuid %)
+                           (select-keys % [:block/title :block/order :block/parent :block/uuid])))
+             (into {}))
+        annotation-blocks
+        (mapv #(build-annotation-block % color-text-idents parent-asset image-asset-name-to-uuids md-blocks opts)
+              (get-in asset-edn-map [:edn-content :highlights]))
+        md-children-blocks*
+        (find-annotation-children-blocks (vals md-blocks) (set (map :id (get-in asset-edn-map [:edn-content :highlights]))))
+        md-children-blocks (keep #(sqlite-util/block-with-timestamps (merge % {:block/page :logseq.class/Asset}))
+                                 md-children-blocks*)]
+    (into annotation-blocks md-children-blocks)))
+
+(defn- build-new-asset [asset-data]
+  (merge (sqlite-util/block-with-timestamps
+          {:block/uuid (d/squuid)
+           :block/order (db-order/gen-key)
+           :block/page :logseq.class/Asset
+           :block/parent :logseq.class/Asset})
+         {:block/tags [:logseq.class/Asset]
+          :logseq.property.asset/type (:type asset-data)
+          :logseq.property.asset/checksum (:checksum asset-data)
+          :logseq.property.asset/size (:size asset-data)}))
+
+(defn- build-annotation-images
+  "Builds tx for annotation images and provides a map for mapping image asset names
+   to their new uuids"
+  [parent-asset-path assets]
+  (let [image-dir (string/replace-first parent-asset-path #"(?i)\.pdf$" "")
+        image-paths (filter #(= image-dir (node-path/dirname %)) (keys @assets))
+        txs (mapv #(let [new-asset (merge (build-new-asset (get @assets %))
+                                          {:block/title "pdf area highlight"})]
+                     (swap! assets assoc-in [% :block/uuid] (:block/uuid new-asset))
+                     new-asset)
+                  image-paths)]
+    {:txs txs
+     :image-asset-name-to-uuids
+     (->> (map (fn [image-path tx]
+                 [(node-path/basename image-path) (:block/uuid tx)]) image-paths txs)
+          (into {}))}))
+
+;; Reference same default class in cljs + nbb without needing .cljc
+(def sanitizeFilename' (if (find-ns 'nbb.core) (aget sanitizeFilename "default") sanitizeFilename))
+
+(defn safe-sanitize-file-name
+  "Sanitizes filenames for pdf assets"
+  [s]
+  (sanitizeFilename' (str s)))
+
+(defn- build-pdf-annotations-tx
+  "Builds tx for pdf annotations when a pdf has an annotations EDN file under assets/"
+  [parent-asset-path assets parent-asset pdf-annotation-pages opts]
+  (let [asset-edn-path (node-path/join common-config/local-assets-dir
+                                       (safe-sanitize-file-name
+                                        (node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".edn"))))
+        asset-md-name (str "hls__" (safe-sanitize-file-name
+                                    (node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".md"))))]
+    (when-let [asset-edn-map (get @assets asset-edn-path)]
+      ;; Mark edn asset so it isn't treated like a normal asset later
+      (swap! assets assoc-in [asset-edn-path :pdf-annotation?] true)
+      (let [{:keys [txs image-asset-name-to-uuids]} (build-annotation-images parent-asset-path assets)]
+        (concat txs
+                (build-pdf-annotations-tx* asset-edn-map (get @pdf-annotation-pages asset-md-name) parent-asset image-asset-name-to-uuids opts))))))
+
 (defn- handle-assets-in-block
   "If a block contains assets, creates them as #Asset nodes in the Asset page and references them in the block."
-  [block {:keys [asset-links]} {:keys [assets ignored-assets]}]
+  [block {:keys [asset-links]} {:keys [assets ignored-assets pdf-annotation-pages]} opts]
   (if (seq asset-links)
     (let [asset-maps
           (keep
@@ -1016,24 +1146,19 @@
                (if-let [asset-data (and asset-name (get @assets asset-name))]
                  (if (:block/uuid asset-data)
                    {:asset-name-uuid [asset-name (:block/uuid asset-data)]}
-                   (let [new-block (sqlite-util/block-with-timestamps
-                                    {:block/uuid (d/squuid)
-                                     :block/order (db-order/gen-key)
-                                     :block/page :logseq.class/Asset
-                                     :block/parent :logseq.class/Asset})
-                         new-asset (merge new-block
-                                          {:block/tags [:logseq.class/Asset]
-                                           :logseq.property.asset/type (:type asset-data)
-                                           :logseq.property.asset/checksum (:checksum asset-data)
-                                           :logseq.property.asset/size (:size asset-data)
-                                           :block/title (db-asset/asset-name->title (node-path/basename asset-name))}
+                   (let [new-asset (merge (build-new-asset asset-data)
+                                          {:block/title (db-asset/asset-name->title (node-path/basename asset-name))}
                                           (when-let [metadata (not-empty (common-util/safe-read-map-string (:metadata (second asset-link))))]
-                                            {:logseq.property.asset/resize-metadata metadata}))]
-                      ;;  (prn :asset-added! (node-path/basename asset-name) #_(get @assets asset-name))
-                      ;;  (cljs.pprint/pprint asset-link)
-                     (swap! assets assoc-in [asset-name :block/uuid] (:block/uuid new-block))
+                                            {:logseq.property.asset/resize-metadata metadata}))
+                         pdf-annotations-tx (when (= "pdf" (path/file-ext asset-name))
+                                              (build-pdf-annotations-tx asset-name assets new-asset pdf-annotation-pages opts))
+                         asset-tx (concat [new-asset]
+                                          (when pdf-annotations-tx pdf-annotations-tx))]
+                    ;;  (prn :asset-added! (node-path/basename asset-name))
+                    ;;  (cljs.pprint/pprint asset-link)
+                     (swap! assets assoc-in [asset-name :block/uuid] (:block/uuid new-asset))
                      {:asset-name-uuid [asset-name (:block/uuid new-asset)]
-                      :asset new-asset}))
+                      :asset-tx asset-tx}))
                  (do
                    (swap! ignored-assets conj
                           {:reason "No asset data found for this asset path"
@@ -1041,7 +1166,7 @@
                            :location {:block (:block/title block)}})
                    nil))))
            asset-links)
-          asset-blocks (keep :asset asset-maps)
+          asset-blocks (mapcat :asset-tx asset-maps)
           asset-names-to-uuids
           (into {} (map :asset-name-uuid asset-maps))]
       (cond-> {:block
@@ -1094,7 +1219,7 @@
         {block-after-built-in-props :block deadline-properties-tx :properties-tx}
         (update-block-deadline-and-scheduled block page-names-to-uuids options)
         {block-after-assets :block :keys [asset-blocks-tx]}
-        (handle-assets-in-block block-after-built-in-props walked-ast-blocks (select-keys import-state [:assets :ignored-assets]))
+        (handle-assets-in-block block-after-built-in-props walked-ast-blocks import-state (select-keys options [:log-fn]))
         ;; :block/page should be [:block/page NAME]
         journal-page-created-at (some-> (:block/page block*) second journal-created-ats)
         prepared-block (cond-> block-after-assets
@@ -1112,7 +1237,7 @@
                    add-missing-timestamps
                    ;; old whiteboards may have :block/left
                    (dissoc :block/left :block/format :block.temp/ast-blocks)
-                  ;;  ((fn [x] (prn :block-out x) x))
+                  ;;  ((fn [x] (prn ::block-out x) x))
                    )]
     ;; Order matters as previous txs are referenced in block
     (concat properties-tx deadline-properties-tx asset-blocks-tx [block'])))
@@ -1350,6 +1475,8 @@
    :ignored-files (atom [])
    ;; Vec of maps with keys :path, :reason and :location (optional).
    :ignored-assets (atom [])
+   ;; Map of annotation page paths and their parsed contents
+   :pdf-annotation-pages (atom {})
    ;; Map of property names (keyword) and their current schemas (map of qualified properties).
    ;; Used for adding schemas to properties and detecting changes across a property's usage
    :property-schemas (atom {})
@@ -1492,8 +1619,6 @@
   "Main fn which calls graph-parser to convert markdown into data"
   [db file content {:keys [extract-options import-state]}]
   (let [format (common-util/get-format file)
-        ;; TODO: Remove once pdf highlights are supported
-        ignored-highlight-file? (string/starts-with? (str (path/basename file)) "hls__")
         extract-options' (merge {:block-pattern (common-config/get-block-pattern format)
                                  :date-formatter "MMM do, yyyy"
                                  :uri-encoded? false
@@ -1501,30 +1626,34 @@
                                  :export-to-db-graph? true
                                  :filename-format :legacy}
                                 extract-options
-                                {:db db})]
-    (cond (and (contains? common-config/mldoc-support-formats format) (not ignored-highlight-file?))
-          (-> (extract/extract file content extract-options')
-              (update :pages (fn [pages]
-                               (map #(dissoc % :block.temp/original-page-name) pages)))
-              (update :blocks fix-extracted-block-tags-and-refs))
-
-          (common-config/whiteboard? file)
-          (-> (extract/extract-whiteboard-edn file content extract-options')
-              (update :pages (fn [pages]
-                               (->> pages
-                                    ;; migrate previous attribute for :block/title
-                                    (map #(-> %
-                                              (assoc :block/title (or (:block/original-name %) (:block/title %))
-                                                     :block/tags #{:logseq.class/Whiteboard})
-                                              (dissoc :block/type :block/original-name))))))
-              (update :blocks update-whiteboard-blocks format))
+                                {:db db})
+        extracted
+        (cond (contains? common-config/mldoc-support-formats format)
+              (-> (extract/extract file content extract-options')
+                  (update :pages (fn [pages]
+                                   (map #(dissoc % :block.temp/original-page-name) pages)))
+                  (update :blocks fix-extracted-block-tags-and-refs))
+
+              (common-config/whiteboard? file)
+              (-> (extract/extract-whiteboard-edn file content extract-options')
+                  (update :pages (fn [pages]
+                                   (->> pages
+                                        ;; migrate previous attribute for :block/title
+                                        (map #(-> %
+                                                  (assoc :block/title (or (:block/original-name %) (:block/title %))
+                                                         :block/tags #{:logseq.class/Whiteboard})
+                                                  (dissoc :block/type :block/original-name))))))
+                  (update :blocks update-whiteboard-blocks format))
 
-          :else
-          (if ignored-highlight-file?
-            (swap! (:ignored-files import-state) conj
-                   {:path file :reason :pdf-highlight})
-            (swap! (:ignored-files import-state) conj
-                   {:path file :reason :unsupported-file-format})))))
+              :else
+              (swap! (:ignored-files import-state) conj
+                     {:path file :reason :unsupported-file-format}))]
+    ;; Annotation markdown pages are saved for later as they are dependant on the asset being annotated
+    (if (string/starts-with? (str (path/basename file)) "hls__")
+      (do
+        (swap! (:pdf-annotation-pages import-state) assoc (node-path/basename file) extracted)
+        nil)
+      extracted)))
 
 (defn- build-journal-created-ats
   "Calculate created-at timestamps for journals"
@@ -1683,7 +1812,10 @@
   (set-ui-state [:graph/importing-state :total] (count *doc-files))
   (let [doc-files (mapv #(assoc %1 :idx %2)
                         ;; Sort files to ensure reproducible import behavior
-                        (sort-by :path *doc-files)
+                        ;; pdf annotation pages sort first because other pages depend on them
+                        (sort-by (fn [{:keys [path]}]
+                                   [(not (string/starts-with? (node-path/basename path) "hls__")) path])
+                                 *doc-files)
                         (range 0 (count *doc-files)))]
     (-> (p/loop [_file-map (export-doc-file (get doc-files 0) conn <read-file options)
                  i 0]
@@ -1793,7 +1925,11 @@
                           (sort-by :path *asset-files)
                           (range 0 (count *asset-files)))
         read-asset (fn read-asset [{:keys [path] :as file}]
-                     (-> (<read-asset-file file assets)
+                     (-> (p/let [byte-array (<read-asset-file file assets)]
+                           (when (= "edn" (path/file-ext (:path file)))
+                             (swap! assets assoc-in
+                                    [(asset-path->name path) :edn-content]
+                                    (common-util/safe-read-map-string (utf8/decode byte-array)))))
                          (p/catch
                           (fn [error]
                             (notify-user {:msg (str "Import failed to read " (pr-str path) " with error:\n" (.-message error))
@@ -1916,10 +2052,11 @@
                        (set/rename-keys {:<save-config-file :<save-file})))]
      (let [files (common-config/remove-hidden-files *files config rpath-key)
            logseq-file? #(string/starts-with? (get % rpath-key) "logseq/")
+           asset-file? #(string/starts-with? (get % rpath-key) "assets/")
            doc-files (->> files
-                          (remove logseq-file?)
+                          (remove #(or (logseq-file? %) (asset-file? %)))
                           (filter #(contains? #{"md" "org" "markdown" "edn"} (path/file-ext (:path %)))))
-           asset-files (filter #(string/starts-with? (get % rpath-key) "assets/") files)
+           asset-files (filter asset-file? files)
            doc-options (build-doc-options config options)]
        (log-fn "Importing" (count doc-files) "files ...")
        ;; These export* fns are all the major export/import steps
@@ -1928,7 +2065,7 @@
                              (-> (select-keys options [:notify-user :<save-logseq-file])
                                  (set/rename-keys {:<save-logseq-file :<save-file})))
         ;; Assets are read first as doc-files need data from them to make Asset blocks.
-        ;; Assets are copied after after doc-files as they need block/uuid's from them to name assets
+        ;; Assets are copied after doc-files as they need block/uuid's from them to name assets
         (read-asset-files asset-files <read-asset (merge (select-keys options [:notify-user :set-ui-state])
                                                          {:assets (get-in doc-options [:import-state :assets])}))
         (export-doc-files conn doc-files <read-file doc-options)

+ 6 - 6
deps/graph-parser/src/logseq/graph_parser/text.cljs

@@ -1,13 +1,13 @@
 (ns logseq.graph-parser.text
   "Miscellaneous text util fns for the parser. Used by file and DB graphs"
-  (:require [goog.string :as gstring]
+  (:require [clojure.set :as set]
             [clojure.string :as string]
-            [clojure.set :as set]
-            [logseq.graph-parser.property :as gp-property]
-            [logseq.graph-parser.mldoc :as gp-mldoc]
+            [goog.string :as gstring]
             [logseq.common.util :as common-util]
+            [logseq.common.util.namespace :as ns-util]
             [logseq.common.util.page-ref :as page-ref]
-            [logseq.common.util.namespace :as ns-util]))
+            [logseq.graph-parser.mldoc :as gp-mldoc]
+            [logseq.graph-parser.property :as gp-property]))
 
 (def get-file-basename page-ref/get-file-basename)
 
@@ -151,4 +151,4 @@
             v'))))))
 
 (def namespace-page? ns-util/namespace-page?)
-(def get-namespace-last-part ns-util/get-last-part)
+(def get-namespace-last-part ns-util/get-last-part)

+ 37 - 8
deps/graph-parser/test/logseq/graph_parser/exporter_test.cljs

@@ -103,7 +103,8 @@
            {:size (.-length buffer)
             :checksum checksum
             :type (db-asset/asset-path->type (:path file))
-            :path (:path file)})))
+            :path (:path file)})
+    buffer))
 
 ;; Copied from db-import script and tweaked for an in-memory import
 (defn- import-file-graph-to-db
@@ -119,10 +120,11 @@
                         ;; asset file options
                          :<read-asset <read-asset-file
                          :<copy-asset (fn copy-asset [m]
-                                        (when-not (:block/uuid m)
-                                          (println "[INFO]" "Asset" (pr-str (node-path/basename (:path m)))
-                                                   "does not have a :block/uuid"))
-                                        (swap! assets conj m))}
+                                        (if (:block/uuid m)
+                                          (swap! assets conj m)
+                                          (when-not (:pdf-annotation? m)
+                                            (println "[INFO]" "Asset" (pr-str (node-path/basename (:path m)))
+                                                     "does not have a :block/uuid"))))}
                         (select-keys options [:verbose]))]
     (gp-exporter/export-file-graph conn conn config-file *files options')))
 
@@ -206,13 +208,14 @@
 
       ;; Counts
       ;; Includes journals as property values e.g. :logseq.property/deadline
-      (is (= 27 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Journal]] @conn))))
+      (is (= 29 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Journal]] @conn))))
 
-      (is (= 3 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Asset]] @conn))))
+      (is (= 5 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Asset]] @conn))))
       (is (= 4 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Task]] @conn))))
       (is (= 4 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Query]] @conn))))
       (is (= 2 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Card]] @conn))))
       (is (= 3 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Quote-block]] @conn))))
+      (is (= 2 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Pdf-annotation]] @conn))))
 
       ;; Properties and tags aren't included in this count as they aren't a Page
       (is (= 10
@@ -235,7 +238,7 @@
       (is (= 0 (count @(:ignored-properties import-state))) "No ignored properties")
       (is (= 0 (count @(:ignored-assets import-state))) "No ignored assets")
       (is (= 1 (count @(:ignored-files import-state))) "Ignore .edn for now")
-      (is (= 3 (count @assets))))
+      (is (= 5 (count @assets))))
 
     (testing "logseq files"
       (is (= ".foo {}\n"
@@ -418,6 +421,32 @@
       (is (= (d/entity @conn :logseq.class/Asset)
              (:block/page (db-test/find-block-by-content @conn "greg-popovich-thumbs-up_1704749687791_0")))
           "Imported into Asset page")
+
+      ;; Annotations
+      (is (= {:logseq.property.pdf/hl-color :logseq.property/color.blue
+              :logseq.property.pdf/hl-page 8
+              :block/tags [:logseq.class/Pdf-annotation]
+              :logseq.property/asset "Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0"}
+             (dissoc (db-test/readable-properties (db-test/find-block-by-content @conn #"Duke School - modified"))
+                     :logseq.property.pdf/hl-value :logseq.property/ls-type))
+          "Pdf text highlight has correct properties")
+      (is (= ["note about duke" "sub note"]
+             (mapv :block/title (rest (ldb/get-block-and-children @conn (:block/uuid (db-test/find-block-by-content @conn #"Duke School - modified"))))))
+          "Pdf text highlight has correct children blocks")
+      (is (= {:logseq.property.pdf/hl-color :logseq.property/color.yellow
+              :logseq.property.pdf/hl-page 1
+              :block/tags [:logseq.class/Pdf-annotation]
+              :logseq.property/asset "Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0"
+              :logseq.property.pdf/hl-image "pdf area highlight"
+              :logseq.property.pdf/hl-type :area}
+             (dissoc (->> (d/q '[:find [?b ...]
+                                 :where [?b :block/tags :logseq.class/Pdf-annotation] [?b :block/title ""]] @conn)
+                          first
+                          (d/entity @conn)
+                          db-test/readable-properties)
+                     :logseq.property.pdf/hl-value :logseq.property/ls-type))
+          "Pdf area highlight has correct properties")
+
       ;; Quotes
       (is (= {:block/tags [:logseq.class/Quote-block]
               :logseq.property.node/display-type :quote}

+ 30 - 0
deps/graph-parser/test/resources/exporter-test-graph/assets/Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.edn

@@ -0,0 +1,30 @@
+{:highlights [{:id #uuid "687022ae-dac1-42a6-9d4c-39a0dba05918",
+               :page 1,
+               :position {:bounding {:x1 131,
+                                     :y1 336,
+                                     :x2 483,
+                                     :y2 399,
+                                     :width 574.0000000000001,
+                                     :height 573.999986224},
+                          :rects (),
+                          :page 1},
+               :content {:text "", :image 1752179374600},
+               :properties {:color "yellow"}}
+              {:id #uuid "68702394-3613-4bac-85a7-28643d58237f",
+               :page 8,
+               :position {:bounding {:x1 10.680589094758034,
+                                     :y1 183.2645263671875,
+                                     :x2 119.76637782156467,
+                                     :y2 204.954345703125,
+                                     :width 574.0000000000001,
+                                     :height 573.999986224},
+                          :rects ({:x1 10.680589094758034,
+                                   :y1 183.2645263671875,
+                                   :x2 119.76637782156467,
+                                   :y2 204.954345703125,
+                                   :width 574.0000000000001,
+                                   :height 573.999986224}),
+                          :page 8},
+               :content {:text "Duke School"},
+               :properties {:color "blue"}}],
+ :extra {:page 2}}

BIN
deps/graph-parser/test/resources/exporter-test-graph/assets/Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.pdf


BIN
deps/graph-parser/test/resources/exporter-test-graph/assets/Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0/1_687022ae-dac1-42a6-9d4c-39a0dba05918_1752179374600.png


+ 1 - 0
deps/graph-parser/test/resources/exporter-test-graph/journals/2025_07_10.md

@@ -0,0 +1 @@
+- ![Sina de Capoeria Batizado 2025 - Program Itinerary.pdf](../assets/Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.pdf)

+ 1 - 0
deps/graph-parser/test/resources/exporter-test-graph/journals/2025_07_15.md

@@ -0,0 +1 @@
+- Test ref to an annotation: ((68702394-3613-4bac-85a7-28643d58237f))

+ 16 - 0
deps/graph-parser/test/resources/exporter-test-graph/pages/hls__Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.md

@@ -0,0 +1,16 @@
+file:: [Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.pdf](../assets/Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.pdf)
+file-path:: ../assets/Sina_de_Capoeria_Batizado_2025_-_Program_Itinerary_1752179325104_0.pdf
+
+- id:: 687022ae-dac1-42a6-9d4c-39a0dba05918
+  ls-type:: annotation
+  hl-page:: 1
+  hl-color:: yellow
+  hl-type:: area
+  hl-stamp:: 1752179374600
+- Duke School - modified
+  hl-page:: 8
+  ls-type:: annotation
+  id:: 68702394-3613-4bac-85a7-28643d58237f
+  hl-color:: blue
+	- note about duke
+		- sub note

+ 19 - 0
deps/graph-parser/yarn.lock

@@ -425,6 +425,13 @@ safe-buffer@^5.0.1, safe-buffer@~5.2.0:
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
 
[email protected]:
+  version "1.6.3"
+  resolved "https://registry.yarnpkg.com/sanitize-filename/-/sanitize-filename-1.6.3.tgz#755ebd752045931977e30b2025d340d7c9090378"
+  integrity sha512-y/52Mcy7aw3gRm7IrcGDFx/bCk4AhRh2eI9luHOQM86nZsqwiRkkq2GekHXBBD+SmPidc8i2PqtYZl+pWJ8Oeg==
+  dependencies:
+    truncate-utf8-bytes "^1.0.0"
+
 semver@^5.5.0:
   version "5.7.2"
   resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.2.tgz#48d55db737c3287cd4835e17fa13feace1c41ef8"
@@ -540,6 +547,13 @@ tar-stream@^2.1.4:
     inherits "^2.0.3"
     readable-stream "^3.1.1"
 
+truncate-utf8-bytes@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz#405923909592d56f78a5818434b0b78489ca5f2b"
+  integrity sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==
+  dependencies:
+    utf8-byte-length "^1.0.1"
+
 tunnel-agent@^0.6.0:
   version "0.6.0"
   resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
@@ -547,6 +561,11 @@ tunnel-agent@^0.6.0:
   dependencies:
     safe-buffer "^5.0.1"
 
+utf8-byte-length@^1.0.1:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.5.tgz#f9f63910d15536ee2b2d5dd4665389715eac5c1e"
+  integrity sha512-Xn0w3MtiQ6zoz2vFyUVruaCL53O/DwUvkEeOvj+uulMm0BkUGYWmBYVyElqZaSLhY6ZD0ulfU3aBra2aVT4xfA==
+
 util-deprecate@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"

+ 1 - 1
deps/outliner/deps.edn

@@ -6,7 +6,7 @@
 
   ;; Any other deps should be added here and to nbb.edn
   logseq/db             {:local/root "../db"}
-  logseq/graph-parser   {:local/root "../db"}
+  logseq/graph-parser   {:local/root "../graph-parser"}
   metosin/malli {:mvn/version "0.16.1"}}
  :aliases
  {:clj-kondo

+ 9 - 8
src/main/frontend/components/imports.cljs

@@ -351,26 +351,27 @@
 (defn- read-asset [file assets]
   (-> (.arrayBuffer (:file-object file))
       (p/then (fn [buffer]
-                (p/let [checksum (db-asset/<get-file-array-buffer-checksum buffer)]
+                (p/let [checksum (db-asset/<get-file-array-buffer-checksum buffer)
+                        byte-array (js/Uint8Array. buffer)]
                   (swap! assets assoc
                          (gp-exporter/asset-path->name (:path file))
                          {:size (.-size (:file-object file))
                           :checksum checksum
                           :type (db-asset/asset-path->type (:path file))
                           :path (:path file)
-                          ;; Save buffer to avoid reading asset twice
-                          ::array-buffer buffer}))))))
+                          ;; Save array to avoid reading asset twice
+                          ::byte-array byte-array})
+                  byte-array)))))
 
 (defn- copy-asset [repo repo-dir asset-m]
-  (-> (::array-buffer asset-m)
-      (p/then (fn [buffer]
-                (let [content (js/Uint8Array. buffer)
-                      assets-dir (path/path-join repo-dir common-config/local-assets-dir)]
+  (-> (::byte-array asset-m)
+      (p/then (fn [content]
+                (let [assets-dir (path/path-join repo-dir common-config/local-assets-dir)]
                   (p/do!
                    (fs/mkdir-if-not-exists assets-dir)
                    (if (:block/uuid asset-m)
                      (fs/write-plain-text-file! repo assets-dir (str (:block/uuid asset-m) "." (:type asset-m)) content {:skip-transact? true})
-                     (do
+                     (when-not (:pdf-annotation? asset-m)
                        (println "Copied asset" (pr-str (node-path/basename (:path asset-m)))
                                 "by its name since it was unused.")
                        (fs/write-plain-text-file! repo assets-dir (node-path/basename (:path asset-m)) content {:skip-transact? true})))))))))

+ 2 - 1
src/main/frontend/extensions/pdf/assets.cljs

@@ -25,6 +25,7 @@
             [frontend.util.ref :as ref]
             [logseq.common.config :as common-config]
             [logseq.common.path :as path]
+            [logseq.graph-parser.exporter :as gp-exporter]
             [logseq.publishing.db :as publish-db]
             [medley.core :as medley]
             [promesa.core :as p]
@@ -52,7 +53,7 @@
                       (some-> url (js/decodeURIComponent)
                               (get-in-repo-assets-full-filename)
                               (string/replace '"/" "_")))
-        filekey   (util/safe-sanitize-file-name
+        filekey   (gp-exporter/safe-sanitize-file-name
                    (subs filename' 0 (- (count filename') (inc (count ext-name)))))]
     (when-let [key (and (not (string/blank? filekey))
                         (if web-link?