Browse Source

enhance: add script to build large graphs with configurable sizes

Gabriel Horner 1 year ago
parent
commit
42c1785bb8

+ 19 - 0
scripts/README.md

@@ -31,6 +31,25 @@ properties. Read the docs in
 [logseq.tasks.db-graph.create-graph](src/logseq/tasks/db_graph/create_graph.cljs)
 [logseq.tasks.db-graph.create-graph](src/logseq/tasks/db_graph/create_graph.cljs)
 for specifics on the EDN map.
 for specifics on the EDN map.
 
 
+To create large graphs with varying size:
+
+```
+$ yarn -s nbb-logseq src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs large
+Building tx ...
+Built 21000 tx, 1000 pages and 20000 blocks ...
+Transacting chunk 1 of 21 starting with block: #:block{:name "page-0"}
+...
+Created graph large with 187810 datoms!
+
+# To see options available
+$ yarn -s nbb-logseq src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs -h
+Usage: $0 GRAPH-NAME [OPTIONS]
+Options:
+  -h, --help        Print help
+  -p, --pages  1000 Number of pages to create
+  -b, --blocks 20   Number of blocks to create
+```
+
 Another example is the `create_graph_with_schema_org.cljs` script which creates a graph
 Another example is the `create_graph_with_schema_org.cljs` script which creates a graph
 with the https://schema.org/ ontology with as many of the classes and properties as possible:
 with the https://schema.org/ ontology with as many of the classes and properties as possible:
 
 

+ 84 - 0
scripts/src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs

@@ -0,0 +1,84 @@
+(ns logseq.tasks.db-graph.create-graph-with-large-sizes
+  "Script that generates graphs at large sizes"
+  (:require [logseq.tasks.db-graph.create-graph :as create-graph]
+            [clojure.string :as string]
+            [datascript.core :as d]
+            [babashka.cli :as cli]
+            ["path" :as node-path]
+            ["os" :as os]
+            [nbb.core :as nbb]))
+
+(def *ids (atom #{}))
+(defn get-next-id
+  []
+  (let [id (random-uuid)]
+    (if (@*ids id)
+      (get-next-id)
+      (do
+        (swap! *ids conj id)
+        id))))
+
+(defn build-pages
+  [start-idx n]
+  (let [ids (repeatedly n get-next-id)]
+    (map-indexed
+     (fn [idx id]
+       {:block/uuid id
+        :block/name (str "page-" (+ start-idx idx))})
+     ids)))
+
+(defn build-blocks
+  [size]
+  (vec (repeatedly size
+                   (fn []
+                     (let [id (get-next-id)]
+                       {:block/uuid id
+                        :block/content (str id)})))))
+
+(defn- create-init-data
+  [options]
+  (let [pages (build-pages 0 (:pages options))]
+    {:pages-and-blocks
+     (mapv #(hash-map :page % :blocks (build-blocks (:blocks options)))
+           pages)}))
+
+(def spec
+  "Options spec"
+  {:help {:alias :h
+          :desc "Print help"}
+   :pages {:alias :p
+           :default 1000
+           :desc "Number of pages to create"}
+   :blocks {:alias :b
+            :default 20
+            :desc "Number of blocks to create"}})
+
+(defn -main [args]
+  (let [graph-dir (first args)
+        options (cli/parse-opts args {:spec spec})
+        _ (when (or (nil? graph-dir) (:help options))
+            (println (str "Usage: $0 GRAPH-NAME [OPTIONS]\nOptions:\n"
+                          (cli/format-opts {:spec spec})))
+            (js/process.exit 1))
+        [dir db-name] (if (string/includes? graph-dir "/")
+                        ((juxt node-path/dirname node-path/basename) graph-dir)
+                        [(node-path/join (os/homedir) "logseq" "graphs") graph-dir])
+        conn (create-graph/init-conn dir db-name)
+        _ (println "Building tx ...")
+        blocks-tx (create-graph/create-blocks-tx (create-init-data options))]
+    (println "Built" (count blocks-tx) "tx," (count (filter :block/name blocks-tx)) "pages and"
+             (count (filter :block/content blocks-tx)) "blocks ...")
+    ;; Vary the chunking with page size for now
+    (let [tx-chunks (partition-all (:pages options) blocks-tx)]
+      (loop [chunks tx-chunks
+             chunk-num 1]
+        (when-let [chunk (first chunks)]
+          (println "Transacting chunk" chunk-num  "of" (count tx-chunks)
+                   "starting with block:" (pr-str (select-keys (first chunk) [:block/content :block/name])))
+          (d/transact! conn chunk)
+          (recur (rest chunks) (inc chunk-num)))))
+    #_(d/transact! conn blocks-tx)
+    (println "Created graph" (str db-name " with " (count (d/datoms @conn :eavt)) " datoms!"))))
+
+(when (= nbb/*file* (:file (meta #'-main)))
+  (-main *command-line-args*))