浏览代码

enhance: add mldoc ast schema (#8829)

* enhance: add mldoc ast schema

* Add bb task to validate mldoc

Also move mldoc schema to make room for other schemas

---------

Co-authored-by: Gabriel Horner <[email protected]>
Co-authored-by: Tienson Qin <[email protected]>
rcmerci 2 年之前
父节点
当前提交
c3f7417682

+ 3 - 0
bb.edn

@@ -69,6 +69,9 @@
   dev:validate-global-config-edn
   dev:validate-global-config-edn
   logseq.tasks.malli/validate-global-config-edn
   logseq.tasks.malli/validate-global-config-edn
 
 
+  dev:validate-ast
+  logseq.tasks.malli/validate-ast
+
   dev:lint
   dev:lint
   logseq.tasks.dev/lint
   logseq.tasks.dev/lint
 
 

+ 2 - 0
deps/graph-parser/.carve/ignore

@@ -44,3 +44,5 @@ logseq.graph-parser.util/remove-nils
 logseq.graph-parser.text/get-file-basename
 logseq.graph-parser.text/get-file-basename
 ;; API
 ;; API
 logseq.graph-parser.mldoc/mldoc-link?
 logseq.graph-parser.mldoc/mldoc-link?
+;; public var
+logseq.graph-parser.schema.mldoc/block-ast-coll-schema

+ 3 - 1
deps/graph-parser/src/logseq/graph_parser/mldoc.cljc

@@ -13,7 +13,8 @@
             [logseq.graph-parser.utf8 :as utf8]
             [logseq.graph-parser.utf8 :as utf8]
             [clojure.string :as string]
             [clojure.string :as string]
             [logseq.graph-parser.util :as gp-util]
             [logseq.graph-parser.util :as gp-util]
-            [logseq.graph-parser.config :as gp-config]))
+            [logseq.graph-parser.config :as gp-config]
+            [logseq.graph-parser.schema.mldoc :as mldoc-schema]))
 
 
 (defonce parseJson (gobj/get Mldoc "parseJson"))
 (defonce parseJson (gobj/get Mldoc "parseJson"))
 (defonce parseInlineJson (gobj/get Mldoc "parseInlineJson"))
 (defonce parseInlineJson (gobj/get Mldoc "parseInlineJson"))
@@ -117,6 +118,7 @@
         original-ast))))
         original-ast))))
 
 
 (defn ->edn
 (defn ->edn
+  {:malli/schema [:=> [:cat :string :string] mldoc-schema/block-ast-with-pos-coll-schema]}
   [content config]
   [content config]
   (if (string? content)
   (if (string? content)
     (try
     (try

+ 220 - 0
deps/graph-parser/src/logseq/graph_parser/schema/mldoc.cljc

@@ -0,0 +1,220 @@
+(ns logseq.graph-parser.schema.mldoc
+  "Malli schema for mldoc AST")
+
+(defn- field-optional-and-maybe-nil
+  [k v]
+  [k {:optional true} [:maybe v]])
+
+(def pos-schema
+  [:map
+   [:start_pos :int]
+   [:end_pos :int]])
+
+(def nested-link-schema
+  [:schema {:registry {::nested-link
+                       [:map
+                        [:content :string]
+                        [:children [:sequential [:or
+                                                 [:tuple [:= "Label"] :string]
+                                                 [:tuple [:= "Nested_link"] [:ref ::nested-link]]]]]]}}
+   ::nested-link])
+
+(def timestamp-schema
+  [:map
+   [:date [:map
+           [:year :int]
+           [:month :int]
+           [:day :int]]]
+   [:wday :string]
+   (field-optional-and-maybe-nil
+    :time [:map
+           [:hour :int]
+           [:min :int]])
+   (field-optional-and-maybe-nil
+    :repetition
+    :any)
+   [:active :boolean]])
+
+(def ^:private time-range-schema
+  [:map
+   [:start [:ref ::timestamp]]
+   [:stop [:ref ::timestamp]]])
+
+(def ^:private link-schema
+  [:map
+   [:url [:or
+          [:cat [:= "File"] :string]
+          [:cat [:= "Search"] :string]
+          [:cat [:= "Complex"] [:map
+                                [:protocol :string]
+                                [:link :string]]]
+          [:cat [:= "Page_ref"] :string]
+          [:cat [:= "Block_ref"] :string]
+          [:cat [:= "Embed_data"] :string]]]
+   [:label [:sequential [:ref ::inline]]]
+   (field-optional-and-maybe-nil :title :string)
+   [:full_text :string]
+   [:metadata :string]])
+
+(def latex-fragment-schema
+  [:or
+   [:tuple [:= "Inline"] :string]
+   [:tuple [:= "Displayed"] :string]])
+
+(def inline-ast-schema
+  [:schema {:registry {::timestamp timestamp-schema
+                       ::time-range time-range-schema
+                       ::link link-schema
+                       ::inline
+                       [:or
+                        [:tuple [:= "Emphasis"]
+                         [:tuple
+                          [:tuple [:enum "Italic" "Bold" "Underline" "Strike_through" "Highlight"]]
+                          [:sequential [:ref ::inline]]]]
+
+                        [:tuple [:= "Break_Line"]]
+                        [:tuple [:= "Hard_Break_Line"]]
+                        [:tuple [:= "Verbatim"] :string]
+                        [:tuple [:= "Code"] :string]
+                        [:tuple [:= "Tag"] [:sequential [:ref ::inline]]]
+                        [:tuple [:= "Spaces"] :string]
+                        [:tuple [:= "Plain"] :string]
+                        [:tuple [:= "Link"] [:ref ::link]]
+                        [:tuple [:= "Nested_link"] nested-link-schema]
+                        [:tuple [:= "Target"] :string]
+                        [:tuple [:= "Subscript"] [:sequential [:ref ::inline]]]
+                        [:tuple [:= "Superscript"] [:sequential [:ref ::inline]]]
+                        [:tuple [:= "Footnote_Reference"] [:map
+                                                           [:id :int]
+                                                           [:name :string]
+                                                           (field-optional-and-maybe-nil
+                                                            :definition  [:sequential [:ref ::inline]])]]
+                        [:tuple [:= "Cookie"] [:or
+                                               [:tuple [:= "Percent"] :int]
+                                               [:catn [:label [:= "Absolute"]] [:current :int] [:total :int]]]]
+                        [:tuple [:= "Latex_Fragment"] latex-fragment-schema]
+                        [:tuple [:= "Macro"] [:map
+                                              [:name :string]
+                                              [:arguments [:sequential :string]]]]
+                        [:tuple [:= "Entity"] [:map
+                                               [:name :string]
+                                               [:latex :string]
+                                               [:latex_mathp :boolean]
+                                               [:html :string]
+                                               [:ascii :string]
+                                               [:unicode :string]]]
+                        [:tuple [:= "Timestamp"] [:or
+                                                  [:tuple [:= "Scheduled"] [:ref ::timestamp]]
+                                                  [:tuple [:= "Deadline"] [:ref ::timestamp]]
+                                                  [:tuple [:= "Date"] [:ref ::timestamp]]
+                                                  [:tuple [:= "Closed"] [:ref ::timestamp]]
+                                                  [:tuple [:= "Clock"] [:or
+                                                                        [:tuple [:= "Started"] [:ref ::timestamp]]
+                                                                        [:tuple [:= "Stopped"] [:ref ::time-range]]]]
+                                                  [:tuple [:= "Range"] [:ref ::time-range]]]]
+                        [:tuple [:= "Radio_Target"] :string]
+                        [:tuple [:= "Export_Snippet"] :string :string]
+                        [:tuple [:= "Inline_Source_Block"] [:map
+                                                            [:language :string]
+                                                            [:options :string]
+                                                            [:code :string]]]
+                        [:tuple [:= "Email"] [:map
+                                              [:local_part :string]
+                                              [:domain :string]]]
+                        [:tuple [:= "Inline_Hiccup"] :string]
+                        [:tuple [:= "Inline_Html"] :string]]}}
+   ::inline])
+
+(def ^:private list-item-schema
+  [:map
+   [:content [:sequential [:ref ::block]]]
+   [:items [:sequential [:ref ::list-item]]]
+   (field-optional-and-maybe-nil
+    :number :int)
+   [:name [:sequential [:ref ::inline]]]
+   (field-optional-and-maybe-nil
+    :checkbox :boolean)
+   [:indent :int]
+   [:ordered :boolean]])
+
+(def ^:private heading-schema
+  [:map
+   [:title [:sequential [:ref ::inline]]]
+   [:tags [:sequential :string]]
+   (field-optional-and-maybe-nil
+    :marker :string)
+   [:level :int]
+   (field-optional-and-maybe-nil
+    :numbering [:sequential :int])
+   (field-optional-and-maybe-nil
+    :priority :string)
+   [:anchor :string]
+   [:meta :map]
+   (field-optional-and-maybe-nil
+    :size :int)])
+
+(def block-ast-schema
+  [:schema {:registry {::inline inline-ast-schema
+                       ::list-item list-item-schema
+                       ::block
+                       [:or
+                        [:tuple [:= "Paragraph"] [:sequential [:ref ::inline]]]
+                        [:tuple [:= "Paragraph_Sep"] :int]
+                        [:tuple [:= "Heading"] heading-schema]
+                        [:tuple [:= "List"] [:sequential [:ref ::list-item]]]
+                        [:tuple [:= "Directive"] :string :string]
+                        [:tuple [:= "Results"]]
+                        [:tuple [:= "Example"] [:sequential :string]]
+                        [:tuple [:= "Src"] [:map
+                                            [:lines [:sequential :string]]
+                                            (field-optional-and-maybe-nil
+                                             :language :string)
+                                            (field-optional-and-maybe-nil
+                                             :options [:sequential :string])
+                                            [:pos_meta pos-schema]]]
+                        [:tuple [:= "Quote"] [:sequential [:ref ::block]]]
+                        [:catn
+                         [:label [:= "Export"]]
+                         [:type :string]
+                         [:options [:maybe [:sequential :string]]]
+                         [:content :string]]
+                        [:tuple [:= "CommentBlock"] [:sequential :string]]
+                        [:catn
+                         [:label [:= "Custom"]]
+                         [:type :string]
+                         [:options [:maybe :string]]
+                         [:result [:sequential [:ref ::block]]]
+                         [:content :string]]
+                        [:tuple [:= "Latex_Fragment"] latex-fragment-schema]
+                        [:catn
+                         [:label [:= "Latex_Environment"]]
+                         [:name :string]
+                         [:options [:maybe :string]]
+                         [:content :string]]
+                        [:tuple [:= "Displayed_Math"] :string]
+                        [:tuple [:= "Drawer"] :string [:sequential :string]]
+                        [:tuple [:= "Property_Drawer"]
+                         [:sequential
+                          [:catn [:k :string] [:v :string] [:other-info [:sequential [:ref ::inline]]]]]]
+                        [:tuple [:= "Footnote_Definition"] :string [:sequential [:ref ::inline]]]
+                        [:tuple [:= "Horizontal_Rule"]]
+                        [:tuple [:= "Table"]
+                         [:map
+                          (field-optional-and-maybe-nil
+                           :header [:sequential [:sequential [:ref ::inline]]])
+                          [:groups [:sequential [:sequential [:sequential [:sequential [:ref ::inline]]]]]]
+                          [:col_groups [:sequential :int]]]]
+                        [:tuple [:= "Comment"] :string]
+                        [:tuple [:= "Raw_Html"] :string]
+                        [:tuple [:= "Hiccup"] :string]
+
+                        ;; this block type is not from mldoc,
+                        ;; but from `logseq.graph-parser.mldoc/collect-page-properties`
+                        [:tuple [:= "Properties"] [:sequential :any]]]}}
+   ::block])
+
+(def block-ast-with-pos-coll-schema
+  [:sequential [:cat block-ast-schema [:maybe pos-schema]]])
+
+(def block-ast-coll-schema
+  [:sequential block-ast-schema])

+ 27 - 0
scripts/src/logseq/tasks/malli.clj

@@ -5,6 +5,8 @@
             [frontend.schema.handler.plugin-config :as plugin-config-schema]
             [frontend.schema.handler.plugin-config :as plugin-config-schema]
             [frontend.schema.handler.global-config :as global-config-schema]
             [frontend.schema.handler.global-config :as global-config-schema]
             [frontend.schema.handler.repo-config :as repo-config-schema]
             [frontend.schema.handler.repo-config :as repo-config-schema]
+            [logseq.graph-parser.schema.mldoc :as mldoc-schema]
+            [babashka.fs :as fs]
             [clojure.pprint :as pprint]
             [clojure.pprint :as pprint]
             [clojure.edn :as edn]))
             [clojure.edn :as edn]))
 
 
@@ -43,3 +45,28 @@
   "Validate a global config.edn"
   "Validate a global config.edn"
   [file]
   [file]
   (validate-file-with-schema file repo-config-schema/Config-edn))
   (validate-file-with-schema file repo-config-schema/Config-edn))
+
+(defn validate-ast
+  "Validate mldoc ast(s) in a file or as an EDN arg"
+  [file-or-edn]
+  (let [edn (edn/read-string
+             (if (fs/exists? file-or-edn) (slurp file-or-edn) file-or-edn))]
+    (if (and (sequential? edn) (:ast (first edn)))
+      ;; Validate multiple asts in the format [{:file "" :ast []} ...]
+      ;; Produced by https://github.com/logseq/nbb-logseq/tree/main/examples/from-js#graph_astmjs
+      (do
+        (println "Validating" (count edn) "files...")
+        (if-let [errors-by-file (seq (keep
+                                      #(when-let [errors (m/explain mldoc-schema/block-ast-with-pos-coll-schema (:ast %))]
+                                         {:file (:file %)
+                                          :errors errors})
+                                      edn))]
+          (do
+            (println "Found errors:")
+            (pprint/pprint errors-by-file))
+          (println "All files valid!")))
+      (if-let [errors (m/explain mldoc-schema/block-ast-with-pos-coll-schema edn)]
+        (do
+          (println "Found errors:")
+          (pprint/pprint errors))
+        (println "Valid!")))))

+ 5 - 4
src/main/frontend/handler/export/common.cljs

@@ -106,10 +106,11 @@
 (defn- update-level-in-block-ast-coll
 (defn- update-level-in-block-ast-coll
   [block-ast-coll origin-level]
   [block-ast-coll origin-level]
   (mapv
   (mapv
-   (fn [[ast-type ast-content]]
-     (if (= ast-type "Heading")
-       [ast-type (update ast-content :level #(+ (dec %) origin-level))]
-       [ast-type ast-content]))
+   (fn [block-ast]
+     (let [[ast-type ast-content] block-ast]
+       (if (= ast-type "Heading")
+         [ast-type (update ast-content :level #(+ (dec %) origin-level))]
+         block-ast)))
    block-ast-coll))
    block-ast-coll))
 
 
 (defn- plain-indent-inline-ast
 (defn- plain-indent-inline-ast

+ 2 - 1
src/main/frontend/handler/export/text.cljs

@@ -7,6 +7,7 @@
             [frontend.handler.export.common :as common :refer
             [frontend.handler.export.common :as common :refer
              [*state* indent newline* raw-text simple-ast-malli-schema
              [*state* indent newline* raw-text simple-ast-malli-schema
               simple-asts->string space]]
               simple-asts->string space]]
+            [logseq.graph-parser.schema.mldoc :as mldoc-schema]
             [frontend.state :as state]
             [frontend.state :as state]
             [frontend.util :as util :refer [concatv mapcatv removev]]
             [frontend.util :as util :refer [concatv mapcatv removev]]
             [goog.dom :as gdom]
             [goog.dom :as gdom]
@@ -320,7 +321,7 @@
          (indent-with-2-spaces (dec current-level)))))])
          (indent-with-2-spaces (dec current-level)))))])
 
 
 ;; {:malli/schema ...} only works on public vars, so use m/=> here
 ;; {:malli/schema ...} only works on public vars, so use m/=> here
-(m/=> block-ast->simple-ast [:=> [:cat [:sequential :any]] [:sequential simple-ast-malli-schema]])
+(m/=> block-ast->simple-ast [:=> [:cat mldoc-schema/block-ast-schema] [:sequential simple-ast-malli-schema]])
 (defn- block-ast->simple-ast
 (defn- block-ast->simple-ast
   [block]
   [block]
   (let [newline-after-block? (get-in *state* [:export-options :newline-after-block])]
   (let [newline-after-block? (get-in *state* [:export-options :newline-after-block])]