浏览代码

cli/parse-graph returns ast data as well

This data was not returned and is valuable for CI jobs
and possibly other functionality
Gabriel Horner 3 年之前
父节点
当前提交
69ba15252e

+ 2 - 1
deps/graph-parser/.clj-kondo/config.edn

@@ -1,6 +1,7 @@
 {:linters
  {:consistent-alias
-  {:aliases {datascript.core d
+  {:aliases {clojure.string string
+             datascript.core d
              logseq.graph-parser graph-parser
              logseq.graph-parser.text text
              logseq.graph-parser.block gp-block

+ 10 - 6
deps/graph-parser/src/logseq/graph_parser.cljs

@@ -24,14 +24,15 @@
   (db-set-file-content! conn file content)
   (let [format (gp-util/get-format file)
         file-content [{:file/path file}]
-        tx (if (contains? gp-config/mldoc-support-formats format)
+        {:keys [tx ast]}
+        (if (contains? gp-config/mldoc-support-formats format)
              (let [extract-options' (merge {:block-pattern (gp-config/get-block-pattern format)
                                             :date-formatter "MMM do, yyyy"
                                             :supported-formats (gp-config/supported-formats)}
                                            extract-options
                                            {:db @conn})
-                   [pages blocks]
-                   (extract/extract-blocks-pages file content extract-options')
+                   {:keys [pages blocks ast]}
+                   (extract/extract file content extract-options')
                    delete-blocks (delete-blocks-fn (first pages) file)
                    block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
                    block-refs-ids (->> (mapcat :block/refs blocks)
@@ -44,13 +45,16 @@
                    pages (extract/with-ref-pages pages blocks)
                    pages-index (map #(select-keys % [:block/name]) pages)]
                ;; does order matter?
-               (concat file-content pages-index delete-blocks pages block-ids blocks))
-             file-content)
+               {:tx (concat file-content pages-index delete-blocks pages block-ids blocks)
+                :ast ast})
+             {:tx file-content})
         tx (concat tx [(cond-> {:file/path file}
                                new?
                                ;; TODO: use file system timestamp?
                                (assoc :file/created-at (date-time-util/time-ms)))])]
-    (d/transact! conn (gp-util/remove-nils tx) (select-keys options [:new-graph? :from-disk?]))))
+    {:tx
+     (d/transact! conn (gp-util/remove-nils tx) (select-keys options [:new-graph? :from-disk?]))
+     :ast ast}))
 
 (defn filter-files
   "Filters files in preparation for parsing. Only includes files that are

+ 11 - 6
deps/graph-parser/src/logseq/graph_parser/cli.cljs

@@ -46,8 +46,12 @@ TODO: Fail fast when process exits 1"
   [conn files {:keys [config] :as options}]
   (let [extract-options (merge {:date-formatter (gp-config/get-date-formatter config)}
                                (select-keys options [:verbose]))]
-    (doseq [{:file/keys [path content]} files]
-      (graph-parser/parse-file conn path content {:extract-options extract-options}))))
+    (mapv
+     (fn [{:file/keys [path content]}]
+       (let [{:keys [ast]}
+             (graph-parser/parse-file conn path content {:extract-options extract-options})]
+         {:file path :ast ast}))
+     files)))
 
 (defn parse-graph
   "Parses a given graph directory and returns a datascript connection and all
@@ -61,8 +65,9 @@ TODO: Fail fast when process exits 1"
   ([dir options]
    (let [files (or (:files options) (build-graph-files dir))
          conn (ldb/start-conn)
-         config (read-config dir)]
-     (when-not (:files options) (println "Parsing" (count files) "files..."))
-     (parse-files conn files (merge options {:config config}))
+         config (read-config dir)
+        _ (when-not (:files options) (println "Parsing" (count files) "files..."))
+         asts (parse-files conn files (merge options {:config config}))]
      {:conn conn
-      :files (map :file/path files)})))
+      :files (map :file/path files)
+      :asts asts})))

+ 10 - 8
deps/graph-parser/src/logseq/graph_parser/extract.cljc

@@ -138,16 +138,17 @@
     (catch :default e
       (log/error :exception e))))
 
-(defn extract-blocks-pages
+(defn extract
+  "Extracts pages, blocks and ast from given file"
   [file content {:keys [user-config verbose] :or {verbose true} :as options}]
   (if (string/blank? content)
     []
     (let [format (gp-util/get-format file)
           _ (when verbose (println "Parsing start: " file))
           ast (gp-mldoc/->edn content (gp-mldoc/default-config format
-                                                         ;; {:parse_outline_only? true}
-                                                         )
-                           user-config)]
+                                        ;; {:parse_outline_only? true}
+                                        )
+                              user-config)]
       (when verbose (println "Parsing finished: " file))
       (let [first-block (ffirst ast)
             properties (let [properties (and (gp-property/properties-ast? first-block)
@@ -165,10 +166,11 @@
                              (update properties :filters
                                      (fn [v]
                                        (string/replace (or v "") "\\" "")))
-                             properties)))]
-        (extract-pages-and-blocks
-         format ast properties
-         file content options)))))
+                             properties)))
+            [pages blocks] (extract-pages-and-blocks format ast properties file content options)]
+        {:pages pages
+         :blocks blocks
+         :ast ast}))))
 
 (defn- with-block-uuid
   [pages]

+ 17 - 5
deps/graph-parser/test/logseq/graph_parser/cli_test.cljs

@@ -1,13 +1,25 @@
 (ns logseq.graph-parser.cli-test
-  (:require [cljs.test :refer [deftest]]
+  (:require [cljs.test :refer [deftest is testing]]
             [logseq.graph-parser.cli :as gp-cli]
-            [logseq.graph-parser.test.docs-graph-helper :as docs-graph-helper]))
+            [logseq.graph-parser.test.docs-graph-helper :as docs-graph-helper]
+            [clojure.string :as string]))
 
 ;; Integration test that test parsing a large graph like docs
 (deftest ^:integration parse-graph
   (let [graph-dir "test/docs"
         _ (docs-graph-helper/clone-docs-repo-if-not-exists graph-dir)
-        {:keys [conn files]} (gp-cli/parse-graph graph-dir)
-        db @conn]
+        {:keys [conn files asts]} (gp-cli/parse-graph graph-dir)]
 
-    (docs-graph-helper/docs-graph-assertions db files)))
+    (docs-graph-helper/docs-graph-assertions @conn files)
+
+    (testing "Asts"
+      (is (seq asts) "Asts returned are non-zero")
+      (is (= files (map :file asts))
+          "There's an ast returned for every file processed")
+      (is (empty? (remove #(or
+                            (seq (:ast %))
+                            ;; logseq files don't have ast
+                            ;; could also used gp-config but API isn't public yet
+                            (string/includes? (:file %) (str graph-dir "/logseq/")))
+                          asts))
+          "Parsed files shouldn't have empty asts"))))

+ 5 - 6
deps/graph-parser/test/logseq/graph_parser/extract_test.cljs

@@ -5,16 +5,15 @@
 
 (defn- extract
   [text]
-  (let [result (extract/extract-blocks-pages "a.md" text {:block-pattern "-"})
-          result (last result)
-          lefts (map (juxt :block/parent :block/left) result)]
+  (let [{:keys [blocks]} (extract/extract "a.md" text {:block-pattern "-"})
+          lefts (map (juxt :block/parent :block/left) blocks)]
     (if (not= (count lefts) (count (distinct lefts)))
       (do
-        (pprint/pprint (map (fn [x] (select-keys x [:block/uuid :block/level :block/content :block/left])) result))
+        (pprint/pprint (map (fn [x] (select-keys x [:block/uuid :block/level :block/content :block/left])) blocks))
         (throw (js/Error. ":block/parent && :block/left conflicts")))
-      (mapv :block/content result))))
+      (mapv :block/content blocks))))
 
-(deftest test-extract-blocks-pages
+(deftest test-extract
   []
   (is (= ["a" "b" "c"]
          (extract

+ 13 - 12
src/main/frontend/handler/file.cljs

@@ -130,18 +130,19 @@
                 file)
          file (gp-util/path-normalize file)
          new? (nil? (db/entity [:file/path file]))]
-     (graph-parser/parse-file
-      (db/get-db repo-url false)
-      file
-      content
-      (merge options
-             {:new? new?
-              :delete-blocks-fn (partial get-delete-blocks repo-url)
-              :extract-options {:user-config (state/get-config)
-                                :date-formatter (state/get-date-formatter)
-                                :page-name-order (state/page-name-order)
-                                :block-pattern (config/get-block-pattern (gp-util/get-format file))
-                                :supported-formats (gp-config/supported-formats)}})))))
+     (:tx
+      (graph-parser/parse-file
+       (db/get-db repo-url false)
+       file
+       content
+       (merge options
+              {:new? new?
+               :delete-blocks-fn (partial get-delete-blocks repo-url)
+               :extract-options {:user-config (state/get-config)
+                                 :date-formatter (state/get-date-formatter)
+                                 :page-name-order (state/page-name-order)
+                                 :block-pattern (config/get-block-pattern (gp-util/get-format file))
+                                 :supported-formats (gp-config/supported-formats)}}))))))
 
 ;; TODO: Remove this function in favor of `alter-files`
 (defn alter-file