浏览代码

enhance: validate db once for an import

and notify if there are invalid blocks. Removed validating per tx during
import which speeds up import 5-10% on small graphs and more on larger
graphs. Also print some useful stats in the console
Gabriel Horner 1 年之前
父节点
当前提交
f28cf73ef4

+ 1 - 0
.clj-kondo/config.edn

@@ -142,6 +142,7 @@
              logseq.db.frontend.property.util db-property-util
              logseq.db.frontend.rules rules
              logseq.db.frontend.schema db-schema
+             logseq.db.frontend.validate db-validate
              logseq.db.sqlite.db sqlite-db
              logseq.db.sqlite.util sqlite-util
              logseq.graph-parser graph-parser

+ 4 - 39
deps/db/script/validate_client_db.cljs

@@ -3,62 +3,27 @@
    NOTE: This script is also used in CI to confirm our db's schema is up to date"
   (:require [logseq.db.sqlite.db :as sqlite-db]
             [logseq.db.frontend.malli-schema :as db-malli-schema]
+            [logseq.db.frontend.validate :as db-validate]
             [datascript.core :as d]
             [clojure.string :as string]
             [nbb.core :as nbb]
             [malli.core :as m]
-            [malli.util :as mu]
             [babashka.cli :as cli]
             ["path" :as node-path]
             ["os" :as os]
             [cljs.pprint :as pprint]))
 
-(defn- build-grouped-errors [db full-maps errors]
-  (->> errors
-       (group-by #(-> % :in first))
-       (map (fn [[idx errors']]
-              {:entity (cond-> (get full-maps idx)
-                         ;; Provide additional page info for debugging
-                         (:block/page (get full-maps idx))
-                         (update :block/page
-                                 (fn [id] (select-keys (d/entity db id)
-                                                       [:block/name :block/type :db/id :block/created-at]))))
-               ;; Group by type to reduce verbosity
-               :errors-by-type
-               (->> (group-by :type errors')
-                    (map (fn [[type' type-errors]]
-                           [type'
-                            {:in-value-distinct (->> type-errors
-                                                     (map #(select-keys % [:in :value]))
-                                                     distinct
-                                                     vec)
-                             :schema-distinct (->> (map :schema type-errors)
-                                                   (map m/form)
-                                                   distinct
-                                                   vec)}]))
-                    (into {}))}))))
-
-(defn- update-schema
-  "Updates the db schema to add a datascript db for property validations
-   and to optionally close maps"
-  [db-schema db {:keys [closed-maps]}]
-  (cond-> db-schema
-    true
-    (db-malli-schema/update-properties-in-schema db)
-    closed-maps
-    mu/closed-schema))
-
 (defn validate-client-db
   "Validate datascript db as a vec of entity maps"
-  [db ent-maps* {:keys [verbose group-errors] :as options}]
+  [db ent-maps* {:keys [verbose group-errors closed-maps]}]
   (let [ent-maps (vec (db-malli-schema/update-properties-in-ents (vals ent-maps*)))
-        schema (update-schema db-malli-schema/DB db options)]
+        schema (db-validate/update-schema db-malli-schema/DB db {:closed-schema? closed-maps})]
     (if-let [errors (->> ent-maps
                          (m/explain schema)
                          :errors)]
       (do
         (if group-errors
-          (let [ent-errors (build-grouped-errors db ent-maps errors)]
+          (let [ent-errors (db-validate/group-errors-by-entity db ent-maps errors)]
             (println "Found" (count ent-errors) "entities in errors:")
             (if verbose
               (pprint/pprint ent-errors)

+ 58 - 11
deps/db/src/logseq/db/frontend/validate.cljs

@@ -1,24 +1,29 @@
 (ns logseq.db.frontend.validate
-  "Validate frontend db"
+  "Validate frontend db for DB graphs"
   (:require [datascript.core :as d]
             [logseq.db.frontend.malli-schema :as db-malli-schema]
             [malli.util :as mu]
             [malli.core :as m]
             [cljs.pprint :as pprint]))
 
-(defn validate-db!
-  "Validates the entities that have changed in the given datascript tx-report.
-   Validation is only for DB graphs. Returns boolean indicating if db is valid"
+(defn update-schema
+  "Updates the db schema to add a datascript db for property validations
+   and to optionally close maps"
+  [db-schema db {:keys [closed-schema?]}]
+  (cond-> db-schema
+    true
+    (db-malli-schema/update-properties-in-schema db)
+    closed-schema?
+    mu/closed-schema))
+
+(defn validate-tx-report!
+  "Validates the datascript tx-report for entities that have changed. Returns
+  boolean indicating if db is valid"
   [{:keys [db-after tx-data tx-meta]} validate-options]
-  (let [{:keys [closed-schema?]} validate-options
-        changed-ids (->> tx-data (map :e) distinct)
+  (let [changed-ids (->> tx-data (map :e) distinct)
         ent-maps* (->> changed-ids (mapcat #(d/datoms db-after :eavt %)) db-malli-schema/datoms->entity-maps vals)
         ent-maps (vec (db-malli-schema/update-properties-in-ents ent-maps*))
-        db-schema (cond-> db-malli-schema/DB
-                    true
-                    (db-malli-schema/update-properties-in-schema db-after)
-                    closed-schema?
-                    mu/closed-schema)]
+        db-schema (update-schema db-malli-schema/DB db-after validate-options)]
     (js/console.log "changed eids:" changed-ids tx-meta)
     (if-let [errors (->> ent-maps
                          (m/explain db-schema)
@@ -28,3 +33,45 @@
           (pprint/pprint {:entity-maps ent-maps})
           false)
       true)))
+
+(defn group-errors-by-entity
+  "Groups malli errors by entities. db is used for providing more debugging info"
+  [db ent-maps errors]
+  (->> errors
+       (group-by #(-> % :in first))
+       (map (fn [[idx errors']]
+              {:entity (cond-> (get ent-maps idx)
+                         ;; Provide additional page info for debugging
+                         (:block/page (get ent-maps idx))
+                         (update :block/page
+                                 (fn [id] (select-keys (d/entity db id)
+                                                       [:block/name :block/type :db/id :block/created-at]))))
+               ;; Group by type to reduce verbosity
+               :errors-by-type
+               (->> (group-by :type errors')
+                    (map (fn [[type' type-errors]]
+                           [type'
+                            {:in-value-distinct (->> type-errors
+                                                     (map #(select-keys % [:in :value]))
+                                                     distinct
+                                                     vec)
+                             :schema-distinct (->> (map :schema type-errors)
+                                                   (map m/form)
+                                                   distinct
+                                                   vec)}]))
+                    (into {}))}))))
+
+(defn validate-db!
+  "Validates all the entities of the given db using :eavt datoms. Returns a map
+  with info about db being validated. If there are errors, they are placed on
+  :errors and grouped by entity"
+  [db]
+  (let [datoms (d/datoms db :eavt)
+        ent-maps* (db-malli-schema/datoms->entity-maps datoms)
+        ent-maps (vec (db-malli-schema/update-properties-in-ents (vals ent-maps*)))
+        schema (update-schema db-malli-schema/DB db {:closed-schema? true})
+        errors (->> ent-maps (m/explain schema) :errors)]
+    (cond-> {:datom-count (count datoms)
+             :entities ent-maps}
+      (some? errors)
+      (assoc :errors (group-errors-by-entity db ent-maps errors)))))

+ 29 - 5
src/main/frontend/components/imports.cljs

@@ -5,6 +5,7 @@
             [clojure.edn :as edn]
             [clojure.string :as string]
             [cljs-time.core :as t]
+            [cljs.pprint :as pprint]
             [datascript.core :as d]
             [frontend.components.onboarding.setups :as setups]
             [frontend.components.repo :as repo]
@@ -37,7 +38,8 @@
             [rum.core :as rum]
             [logseq.common.config :as common-config]
             [lambdaisland.glogi :as log]
-            [frontend.handler.db-based.property.util :as db-pu]))
+            [frontend.handler.db-based.property.util :as db-pu]
+            [logseq.db.frontend.validate :as db-validate]))
 
 ;; Can't name this component as `frontend.components.import` since shadow-cljs
 ;; will complain about it.
@@ -338,6 +340,31 @@
       (ui/button "Submit"
                  {:on-click on-submit})]]))
 
+(defn- counts-from-entities
+  [entities]
+  {:entities (count entities)
+   :pages (count (filter :block/name entities))
+   :blocks (count (filter :block/content entities))
+   :classes (count (filter #(contains? (:block/type %) "class") entities))
+   :properties (count (filter #(contains? (:block/type %) "property") entities))
+   :property-values (count (mapcat :block/properties entities))})
+
+(defn- validate-imported-data
+  [db files]
+  (when-let [org-files (seq (filter #(= "org" (path/file-ext (:rpath %))) files))]
+    (log/info :org-files (mapv :rpath org-files))
+    (notification/show! (str "Imported " (count org-files) " org file(s) as markdown. Support for org files will be added later.")
+                        :info false))
+  (let [{:keys [errors datom-count entities]} (db-validate/validate-db! db)]
+    (when errors
+      (log/error :import-errors {:msg (str "Import detected " (count errors) " invalid block(s):")
+                                 :counts (assoc (counts-from-entities entities) :datoms datom-count)})
+      (pprint/pprint (map :entity errors))
+      (notification/show! (str "Import detected " (count errors) " invalid block(s). These blocks may be buggy when you interact with them. See the javascript console for more.")
+                          :warning false))
+    (log/info :import-valid {:msg "Valid import!"
+                             :counts (assoc (counts-from-entities entities) :datoms datom-count)})))
+
 (defn- import-file-graph
   [*files {:keys [graph-name tags]} config-file]
   (state/set-state! :graph/importing :file-graph)
@@ -362,10 +389,7 @@
       (log/info :import-file-graph {:msg (str "Import finished in " (/ (t/in-millis (t/interval start-time (t/now))) 1000) " seconds")})
       (state/set-state! :graph/importing nil)
       (state/set-state! :graph/importing-state nil)
-      (when-let [org-files (seq (filter #(= "org" (path/file-ext (:rpath %))) files))]
-        (log/info :org-files (mapv :rpath org-files))
-        (notification/show! (str "Imported " (count org-files) " org file(s) as markdown. Support for org files will be added later")
-                            :info false))
+      (validate-imported-data @db-conn files)
       (finished-cb))))
 
 (defn import-file-to-db-handler

+ 3 - 3
src/main/frontend/worker/pipeline.cljs

@@ -6,7 +6,7 @@
             [frontend.worker.react :as worker-react]
             [frontend.worker.file :as file]
             [frontend.worker.util :as worker-util]
-            [logseq.db.frontend.validate :as validate]
+            [logseq.db.frontend.validate :as db-validate]
             [logseq.db.sqlite.util :as sqlite-util]
             [frontend.worker.db.fix :as db-fix]
             [logseq.db :as ldb]))
@@ -62,8 +62,8 @@
 
 (defn validate-and-fix-db!
   [repo conn tx-report context]
-  (when (and (:dev? context) (sqlite-util/db-based-graph? repo))
-    (let [valid? (validate/validate-db! tx-report (:validate-db-options context))]
+  (when (and (:dev? context) (not (:importing? context)) (sqlite-util/db-based-graph? repo))
+    (let [valid? (db-validate/validate-tx-report! tx-report (:validate-db-options context))]
       (when (and (get-in context [:validate-db-options :fail-invalid?]) (not valid?))
         (worker-util/post-message :notification
                                   (pr-str [["Invalid DB!"] :error])))))