Răsfoiți Sursa

add gc walk addresses

Tienson Qin 7 luni în urmă
părinte
comite
524260f6ec

+ 41 - 7
deps/db/src/logseq/db/sqlite/gc.cljs

@@ -1,9 +1,20 @@
 (ns logseq.db.sqlite.gc
   "GC unused addresses from `kvs` table"
   (:require [cljs-bean.core :as bean]
-            [clojure.set]
+            [clojure.set :as set]
             [logseq.db.sqlite.util :as sqlite-util]))
 
+(defn walk-addresses
+  "Given a map of parent address to children addresses and a root address,
+   returns a set of all used addresses including the root and its descendants."
+  [root addr->children]
+  (println :debug :walk-addresses :root root)
+  (time
+   (letfn [(collect-addresses [addr]
+             (let [children (addr->children addr)]
+               (into #{addr} (mapcat collect-addresses children))))]
+     (collect-addresses root))))
+
 (defonce get-non-refed-addrs-sql
   "WITH all_referenced AS (
      SELECT CAST(value AS INTEGER) AS addr
@@ -27,7 +38,7 @@
                                             :rowMode "array"})
                              (map first)
                              set)]
-    (clojure.set/difference non-refed-addrs internal-addrs)))
+    (set/difference non-refed-addrs internal-addrs)))
 
 (defn gc-kvs-table!
   "WASM version to GC kvs table to remove unused addresses"
@@ -54,12 +65,34 @@
                                bean/->clj
                                (map :addr)
                                (set)))]
-    (clojure.set/difference non-refed-addrs internal-addrs)))
+    (set/difference non-refed-addrs internal-addrs)))
+
+(defn- get-unused-addresses-node-walk-version
+  [db]
+  (let [schema (let [stmt (.prepare db "select content from kvs where addr = ?")
+                     content (.-content (.get stmt 0))]
+                 (sqlite-util/transit-read content))
+        set-addresses #{(:eavt schema) (:avet schema) (:aevt schema)}
+        internal-addresses (conj set-addresses 0 1)
+        parent->children (let [stmt (.prepare db "select addr, addresses from kvs")]
+                           (->> (.all ^object stmt)
+                                bean/->clj
+                                (map (fn [{:keys [addr addresses]}]
+                                       [addr (bean/->clj (js/JSON.parse addresses))]))
+                                (into {})))
+        used-addresses (->> (mapcat (fn [set-root-addr]
+                                      (walk-addresses set-root-addr parent->children)) set-addresses)
+                            set
+                            (set/union internal-addresses))]
+    (set/difference (set (keys parent->children)) used-addresses)))
 
 (defn gc-kvs-table-node-version!
-  "Node version to GC kvs table to remove unused addresses"
-  [^Object db]
-  (let [unused-addresses (get-unused-addresses-node-version db)
+  "Node version to GC kvs table to remove unused addresses
+  `walk?` - `true`: walk all used addresses, `false`: gc recursively"
+  [^Object db walk?]
+  (let [unused-addresses (if walk?
+                           (get-unused-addresses-node-walk-version db)
+                           (get-unused-addresses-node-version db))
         addrs-count (let [stmt (.prepare db "select count(*) as c from kvs")]
                       (.-c (.get stmt)))]
     (println :debug "addrs total count: " addrs-count)
@@ -73,7 +106,8 @@
                         (doseq [addr addrs]
                           (.run stmt addr))))]
           (delete (bean/->js unused-addresses))
-          (gc-kvs-table-node-version! db)))
+          (when-not walk?
+            (gc-kvs-table-node-version! db false))))
       (println :debug :db-gc "There's no garbage data that's need to be collected."))))
 
 (defn ensure-no-garbage

+ 3 - 1
deps/db/test/logseq/db/sqlite/gc_test.cljs

@@ -37,7 +37,9 @@
                                 (take 100000)
                                 (map (fn [block] [:db/retractEntity [:block/uuid (:block/uuid block)]])))]
         (d/transact! conn non-ordered-tx))
-      (time (sqlite-gc/gc-kvs-table-node-version! sqlite))
+      (println "gc time")
+      ;; `true` to walk addresses and `false` to recursively run gc
+      (time (sqlite-gc/gc-kvs-table-node-version! sqlite false))
 
       ;; ensure there's no missing address (broken db)
       (is (empty? (sqlite-debug/find-missing-addresses-node-version sqlite))