Prechádzať zdrojové kódy

fix: some pdf annotations weren't imported

. Creation of edn+md paths from pdf path were lacking sanitization which
resulted in edn and md files not being associated with pdf

Fixes https://github.com/logseq/db-test/issues/196#issuecomment-3059363947.
Also fix incorrect dep
Gabriel Horner 4 mesiacov pred
rodič
commit
5de565dcce

+ 2 - 1
deps/graph-parser/package.json

@@ -7,7 +7,8 @@
     "better-sqlite3": "11.10.0"
   },
   "dependencies": {
-    "mldoc": "^1.5.9"
+    "mldoc": "^1.5.9",
+    "sanitize-filename": "1.6.3"
   },
   "scripts": {
     "test": "nbb-logseq -cp test:../outliner/src -m nextjournal.test-runner",

+ 7 - 3
deps/graph-parser/src/logseq/graph_parser/exporter.cljs

@@ -36,7 +36,8 @@
             [logseq.graph-parser.extract :as extract]
             [logseq.graph-parser.property :as gp-property]
             [logseq.graph-parser.utf8 :as utf8]
-            [promesa.core :as p]))
+            [promesa.core :as p]
+            [logseq.graph-parser.text :as text]))
 
 (defn- add-missing-timestamps
   "Add updated-at or created-at timestamps if they doesn't exist"
@@ -1114,8 +1115,11 @@
 (defn- build-pdf-annotations-tx
   "Builds tx for pdf annotations when a pdf has an annotations EDN file under assets/"
   [parent-asset-path assets parent-asset pdf-annotation-pages opts]
-  (let [asset-edn-path (string/replace-first parent-asset-path #"(?i)\.pdf$" ".edn")
-        asset-md-name (str "hls__" (node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".md")))]
+  (let [asset-edn-path (node-path/join common-config/local-assets-dir
+                                       (text/safe-sanitize-file-name
+                                        (node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".edn"))))
+        asset-md-name (str "hls__" (text/safe-sanitize-file-name
+                                    (node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".md"))))]
     (when-let [asset-edn-map (get @assets asset-edn-path)]
       ;; Mark edn asset so it isn't treated like a normal asset later
       (swap! assets assoc-in [asset-edn-path :pdf-annotation?] true)

+ 14 - 5
deps/graph-parser/src/logseq/graph_parser/text.cljs

@@ -1,13 +1,14 @@
 (ns logseq.graph-parser.text
   "Miscellaneous text util fns for the parser. Used by file and DB graphs"
-  (:require [goog.string :as gstring]
-            [clojure.string :as string]
+  (:require ["sanitize-filename" :as sanitizeFilename]
             [clojure.set :as set]
-            [logseq.graph-parser.property :as gp-property]
-            [logseq.graph-parser.mldoc :as gp-mldoc]
+            [clojure.string :as string]
+            [goog.string :as gstring]
             [logseq.common.util :as common-util]
+            [logseq.common.util.namespace :as ns-util]
             [logseq.common.util.page-ref :as page-ref]
-            [logseq.common.util.namespace :as ns-util]))
+            [logseq.graph-parser.mldoc :as gp-mldoc]
+            [logseq.graph-parser.property :as gp-property]))
 
 (def get-file-basename page-ref/get-file-basename)
 
@@ -152,3 +153,11 @@
 
 (def namespace-page? ns-util/namespace-page?)
 (def get-namespace-last-part ns-util/get-last-part)
+
+;; Reference same default class in cljs + nbb without needing .cljc
+(def sanitizeFilename' (if (find-ns 'nbb.core) (aget sanitizeFilename "default") sanitizeFilename))
+
+(defn safe-sanitize-file-name
+  "Sanitizes filenames for pdf assets"
+  [s]
+  (sanitizeFilename' (str s)))

+ 19 - 0
deps/graph-parser/yarn.lock

@@ -425,6 +425,13 @@ safe-buffer@^5.0.1, safe-buffer@~5.2.0:
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
 
[email protected]:
+  version "1.6.3"
+  resolved "https://registry.yarnpkg.com/sanitize-filename/-/sanitize-filename-1.6.3.tgz#755ebd752045931977e30b2025d340d7c9090378"
+  integrity sha512-y/52Mcy7aw3gRm7IrcGDFx/bCk4AhRh2eI9luHOQM86nZsqwiRkkq2GekHXBBD+SmPidc8i2PqtYZl+pWJ8Oeg==
+  dependencies:
+    truncate-utf8-bytes "^1.0.0"
+
 semver@^5.5.0:
   version "5.7.2"
   resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.2.tgz#48d55db737c3287cd4835e17fa13feace1c41ef8"
@@ -540,6 +547,13 @@ tar-stream@^2.1.4:
     inherits "^2.0.3"
     readable-stream "^3.1.1"
 
+truncate-utf8-bytes@^1.0.0:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz#405923909592d56f78a5818434b0b78489ca5f2b"
+  integrity sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==
+  dependencies:
+    utf8-byte-length "^1.0.1"
+
 tunnel-agent@^0.6.0:
   version "0.6.0"
   resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
@@ -547,6 +561,11 @@ tunnel-agent@^0.6.0:
   dependencies:
     safe-buffer "^5.0.1"
 
+utf8-byte-length@^1.0.1:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.5.tgz#f9f63910d15536ee2b2d5dd4665389715eac5c1e"
+  integrity sha512-Xn0w3MtiQ6zoz2vFyUVruaCL53O/DwUvkEeOvj+uulMm0BkUGYWmBYVyElqZaSLhY6ZD0ulfU3aBra2aVT4xfA==
+
 util-deprecate@^1.0.1:
   version "1.0.2"
   resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"

+ 1 - 1
deps/outliner/deps.edn

@@ -6,7 +6,7 @@
 
   ;; Any other deps should be added here and to nbb.edn
   logseq/db             {:local/root "../db"}
-  logseq/graph-parser   {:local/root "../db"}
+  logseq/graph-parser   {:local/root "../graph-parser"}
   metosin/malli {:mvn/version "0.16.1"}}
  :aliases
  {:clj-kondo

+ 3 - 2
src/main/frontend/extensions/pdf/assets.cljs

@@ -29,7 +29,8 @@
             [medley.core :as medley]
             [promesa.core :as p]
             [reitit.frontend.easy :as rfe]
-            [rum.core :as rum]))
+            [rum.core :as rum]
+            [logseq.graph-parser.text :as text]))
 
 (defn get-in-repo-assets-full-filename
   [url]
@@ -52,7 +53,7 @@
                       (some-> url (js/decodeURIComponent)
                               (get-in-repo-assets-full-filename)
                               (string/replace '"/" "_")))
-        filekey   (util/safe-sanitize-file-name
+        filekey   (text/safe-sanitize-file-name
                    (subs filename' 0 (- (count filename') (inc (count ext-name)))))]
     (when-let [key (and (not (string/blank? filekey))
                         (if web-link?