Browse Source

Add mldoc integration test and start mldoc split

Gabriel Horner 3 years ago
parent
commit
657bb09591

+ 2 - 0
.clj-kondo/config.edn

@@ -20,6 +20,8 @@
              frontend.db.query-react query-react
              frontend.util util
              frontend.config config
+             frontend.format.mldoc mldoc
+             logseq.graph-parser.mldoc gp-mldoc
              logseq.graph-parser.util gp-util
              logseq.graph-parser.config gp-config}}}
 

+ 5 - 4
src/main/frontend/components/block.cljs

@@ -53,6 +53,7 @@
             [frontend.util.drawer :as drawer]
             [logseq.graph-parser.config :as gp-config]
             [logseq.graph-parser.util :as gp-util]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [goog.dom :as gdom]
             [goog.object :as gobj]
             [lambdaisland.glogi :as log]
@@ -719,13 +720,13 @@
    (inline-text {} format v))
   ([config format v]
    (when (string? v)
-     (let [inline-list (mldoc/inline->edn v (mldoc/default-config format))]
+     (let [inline-list (mldoc/inline->edn v (gp-mldoc/default-config format))]
        [:div.inline.mr-1 (map-inline config inline-list)]))))
 
 (defn- render-macro
   [config name arguments macro-content format]
   (if macro-content
-    (let [ast (->> (mldoc/->edn macro-content (mldoc/default-config format))
+    (let [ast (->> (mldoc/->edn macro-content (gp-mldoc/default-config format))
                    (map first))
           paragraph? (and (= 1 (count ast))
                           (= "Paragraph" (ffirst ast)))]
@@ -2707,11 +2708,11 @@
 ;;     (cond
 ;;       (= lang "quote")
 ;;       (let [content (string/trim (string/join "\n" lines))]
-;;         ["Quote" (first (mldoc/->edn content (mldoc/default-config :markdown)))])
+;;         ["Quote" (first (mldoc/->edn content (gp-mldoc/default-config :markdown)))])
 
 ;;       (contains? #{"query" "note" "tip" "important" "caution" "warning" "pinned"} lang)
 ;;       (let [content (string/trim (string/join "\n" lines))]
-;;         ["Custom" lang nil (first (mldoc/->edn content (mldoc/default-config :markdown))) content])
+;;         ["Custom" lang nil (first (mldoc/->edn content (gp-mldoc/default-config :markdown))) content])
 
 ;;       :else
 ;;       ["Src" options])))

+ 4 - 3
src/main/frontend/format.cljs

@@ -3,6 +3,7 @@
             [frontend.format.adoc :refer [->AdocMode]]
             [frontend.format.protocol :as protocol]
             [frontend.text :as text]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [clojure.string :as string]))
 
 (set! mldoc/parse-property text/parse-property)
@@ -37,9 +38,9 @@
 ;; html
 (defn get-default-config
   ([format]
-   (mldoc/default-config format))
+   (gp-mldoc/default-config format))
   ([format options]
-   (mldoc/default-config format options)))
+   (gp-mldoc/default-config format options)))
 
 (defn to-html
   ([content format]
@@ -49,7 +50,7 @@
      (if (string/blank? content)
        ""
        (if-let [record (get-format-record format)]
-         (protocol/toHtml record content config mldoc/default-references)
+         (protocol/toHtml record content config gp-mldoc/default-references)
          content)))))
 
 (defn to-edn

+ 2 - 1
src/main/frontend/format/block.cljs

@@ -13,6 +13,7 @@
             [frontend.util.property :as property]
             [logseq.graph-parser.util :as gp-util]
             [logseq.graph-parser.config :as gp-config]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [lambdaisland.glogi :as log]
             [medley.core :as medley]
             [frontend.format.mldoc :as mldoc]))
@@ -702,7 +703,7 @@
                        (str (config/get-block-pattern format) " " (string/triml content)))]
        (if-let [result (state/get-block-ast block-uuid content)]
          result
-         (let [ast (->> (format/to-edn content format (mldoc/default-config format))
+         (let [ast (->> (format/to-edn content format (gp-mldoc/default-config format))
                         (map first))
                title (when (heading-block? (first ast))
                        (:title (second (first ast))))

+ 9 - 62
src/main/frontend/format/mldoc.cljs

@@ -1,6 +1,5 @@
 (ns frontend.format.mldoc
-  (:require [cljs-bean.core :as bean]
-            [clojure.string :as string]
+  (:require [clojure.string :as string]
             [frontend.format.protocol :as protocol]
             [frontend.utf8 :as utf8]
             [goog.object :as gobj]
@@ -8,61 +7,15 @@
             [medley.core :as medley]
             ["mldoc" :as mldoc :refer [Mldoc]]
             [linked.core :as linked]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [logseq.graph-parser.util :as gp-util]
             [logseq.graph-parser.config :as gp-config]))
 
-(defonce parseJson (gobj/get Mldoc "parseJson"))
-(defonce parseInlineJson (gobj/get Mldoc "parseInlineJson"))
-(defonce parseOPML (gobj/get Mldoc "parseOPML"))
-(defonce export (gobj/get Mldoc "export"))
 (defonce anchorLink (gobj/get Mldoc "anchorLink"))
+(defonce parseOPML (gobj/get Mldoc "parseOPML"))
 (defonce parseAndExportMarkdown (gobj/get Mldoc "parseAndExportMarkdown"))
 (defonce parseAndExportOPML (gobj/get Mldoc "parseAndExportOPML"))
-(defonce astExportMarkdown (gobj/get Mldoc "astExportMarkdown"))
-
-(defn convert-export-md-remove-options [opts]
-  (->>
-   (mapv (fn [opt]
-             (case opt
-               :page-ref ["Page_ref"]
-               :emphasis ["Emphasis"]
-               []))
-         opts)
-   (remove empty?)))
-
-
-(defn default-config
-  ([format]
-   (default-config format {:export-heading-to-list? false}))
-  ([format {:keys [export-heading-to-list? export-keep-properties? export-md-indent-style export-md-remove-options parse_outline_only?]}]
-   (let [format (string/capitalize (name (or format :markdown)))]
-     (->> {:toc false
-           :parse_outline_only (or parse_outline_only? false)
-           :heading_number false
-           :keep_line_break true
-           :format format
-           :heading_to_list (or export-heading-to-list? false)
-           :exporting_keep_properties export-keep-properties?
-           :export_md_indent_style export-md-indent-style
-           :export_md_remove_options
-           (convert-export-md-remove-options export-md-remove-options)}
-          (filter #(not(nil? (second %))))
-          (into {})
-          (bean/->js)
-          (js/JSON.stringify)))))
-
-(def default-references
-  (js/JSON.stringify
-   (clj->js {:embed_blocks []
-             :embed_pages []})))
-
-(defn parse-json
-  [content config]
-  (parseJson content config))
-
-(defn inline-parse-json
-  [text config]
-  (parseInlineJson text config))
+(defonce export (gobj/get Mldoc "export"))
 
 (defn parse-opml
   [content]
@@ -72,20 +25,14 @@
   [content config references]
   (parseAndExportMarkdown content
                           config
-                          (or references default-references)))
+                          (or references gp-mldoc/default-references)))
 
 (defn parse-export-opml
   [content config title references]
   (parseAndExportOPML content
                       config
                       title
-                      (or references default-references)))
-
-(defn ast-export-markdown
-  [ast config references]
-  (astExportMarkdown ast
-                     config
-                     (or references default-references)))
+                      (or references gp-mldoc/default-references)))
 
 (defn remove-indentation-spaces
   [s level remove-first-line?]
@@ -201,7 +148,7 @@
       (if (string/blank? content)
         []
         (-> content
-            (parse-json config)
+            (gp-mldoc/parse-json config)
             (gp-util/json->clj)
             (update-src-full-content content)
             (collect-page-properties parse-property)))
@@ -227,7 +174,7 @@
     (if (string/blank? text)
       {}
       (-> text
-          (inline-parse-json config)
+          (gp-mldoc/inline-parse-json config)
           (gp-util/json->clj)))
     (catch js/Error _e
       [])))
@@ -263,7 +210,7 @@
 (defn link?
   [format link]
   (when (string? link)
-    (let [[type link] (first (inline->edn link (default-config format)))
+    (let [[type link] (first (inline->edn link (gp-mldoc/default-config format)))
           [ref-type ref-value] (:url link)]
       (and (= "Link" type)
            (or

+ 5 - 4
src/main/frontend/handler/editor.cljs

@@ -53,6 +53,7 @@
             [promesa.core :as p]
             [frontend.util.keycode :as keycode]
             [logseq.graph-parser.util :as gp-util]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             ["path" :as path]))
 
 ;; FIXME: should support multiple images concurrently uploading
@@ -356,7 +357,7 @@
         content (drawer/with-logbook block content)
         content (with-timetracking block content)
         first-block? (= left page)
-        ast (mldoc/->edn (string/trim content) (mldoc/default-config format))
+        ast (mldoc/->edn (string/trim content) (gp-mldoc/default-config format))
         first-elem-type (first (ffirst ast))
         first-elem-meta (second (ffirst ast))
         properties? (contains? #{"Property_Drawer" "Properties"} first-elem-type)
@@ -1941,7 +1942,7 @@
                     props (into [] (:properties block))
                     content* (str (if (= :markdown format) "- " "* ")
                                   (property/insert-properties format content props))
-                    ast (mldoc/->edn content* (mldoc/default-config format))
+                    ast (mldoc/->edn content* (gp-mldoc/default-config format))
                     blocks (block/extract-blocks ast content* true format)
                     fst-block (first blocks)]
                 (assert fst-block "fst-block shouldn't be nil")
@@ -2850,7 +2851,7 @@
   (when-let [editing-block (state/get-edit-block)]
     (let [page-id (:db/id (:block/page editing-block))
           blocks (block/extract-blocks
-                  (mldoc/->edn text (mldoc/default-config format)) text true format)
+                  (mldoc/->edn text (gp-mldoc/default-config format)) text true format)
           blocks' (block/with-parent-and-left page-id blocks)]
       (paste-blocks blocks' {}))))
 
@@ -3168,7 +3169,7 @@
   [format content semantic?]
   (and (string/includes? content "\n")
        (if semantic?
-         (let [ast (mldoc/->edn content (mldoc/default-config format))
+         (let [ast (mldoc/->edn content (gp-mldoc/default-config format))
                first-elem-type (first (ffirst ast))]
            (mldoc/block-with-title? first-elem-type))
          true)))

+ 3 - 2
src/main/frontend/handler/export.cljs

@@ -16,6 +16,7 @@
             [frontend.state :as state]
             [frontend.util :as util]
             [frontend.format.mldoc :as mldoc]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [goog.dom :as gdom]
             [promesa.core :as p])
   (:import [goog.string StringBuffer]))
@@ -214,7 +215,7 @@
   (let [block (db/entity [:block/uuid (uuid block-uuid)])
         block-content (get-blocks-contents repo (:block/uuid block))
         format (:block/format block)
-        ast (mldoc/->edn block-content (mldoc/default-config format))
+        ast (mldoc/->edn block-content (gp-mldoc/default-config format))
         embed-pages-new  (get-embed-pages-from-ast ast)
         embed-blocks-new  (get-embed-blocks-from-ast ast)
         block-refs-new (get-block-refs-from-ast ast)
@@ -258,7 +259,7 @@
   (let [page-name* (util/page-name-sanity-lc page-name)
         page-content (get-page-content repo page-name*)
         format (:block/format (db/entity [:block/name page-name*]))
-        ast (mldoc/->edn page-content (mldoc/default-config format))
+        ast (mldoc/->edn page-content (gp-mldoc/default-config format))
         embed-pages-new (get-embed-pages-from-ast ast)
         embed-blocks-new (get-embed-blocks-from-ast ast)
         block-refs-new (get-block-refs-from-ast ast)

+ 2 - 1
src/main/frontend/handler/extract.cljs

@@ -12,6 +12,7 @@
             [frontend.text :as text]
             [frontend.util :as util]
             [logseq.graph-parser.util :as gp-util]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [frontend.util.property :as property]
             [lambdaisland.glogi :as log]))
 
@@ -142,7 +143,7 @@
     []
     (let [format (format/get-format file)
           _ (println "Parsing start: " file)
-          ast (mldoc/->edn content (mldoc/default-config format
+          ast (mldoc/->edn content (gp-mldoc/default-config format
                                                          ;; {:parse_outline_only? true}
                                                          ))]
       (println "Parsing finished : " file)

+ 2 - 2
src/main/frontend/handler/plugin.cljs

@@ -3,7 +3,7 @@
             [rum.core :as rum]
             [frontend.util :as util]
             [clojure.walk :as walk]
-            [frontend.format.mldoc :as mldoc]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [frontend.handler.notification :as notifications]
             [camel-snake-kebab.core :as csk]
             [frontend.state :as state]
@@ -403,7 +403,7 @@
                             (string/replace matched link (util/node-path.join url link))
                             matched)))
                       content)]
-        (format/to-html content :markdown (mldoc/default-config :markdown))))
+        (format/to-html content :markdown (gp-mldoc/default-config :markdown))))
     (catch js/Error e
       (log/error :parse-user-md-exception e)
       content)))

+ 1 - 1
src/main/frontend/utf8.cljs

@@ -1,4 +1,4 @@
-(ns frontend.utf8
+(ns ^:nbb-compatible frontend.utf8
   (:require [goog.object :as gobj]))
 
 (defonce encoder

+ 3 - 2
src/main/frontend/util/drawer.cljs

@@ -2,6 +2,7 @@
   (:require [clojure.string :as string]
             [frontend.util :as util]
             [logseq.graph-parser.util :as gp-util]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [frontend.util.property :as property]
             [frontend.format.mldoc :as mldoc]))
 
@@ -23,7 +24,7 @@
 
 (defn get-drawer-ast
   [format content typ]
-  (let [ast (mldoc/->edn content (mldoc/default-config format))
+  (let [ast (mldoc/->edn content (gp-mldoc/default-config format))
         typ-drawer (ffirst (filter (fn [x]
                                      (mldoc/typ-drawer? x typ)) ast))]
     typ-drawer))
@@ -32,7 +33,7 @@
   [format content typ value]
   (when (string? content)
     (try
-      (let [ast (mldoc/->edn content (mldoc/default-config format))
+      (let [ast (mldoc/->edn content (gp-mldoc/default-config format))
             has-properties? (some (fn [x] (mldoc/properties? x)) ast)
             has-typ-drawer? (some (fn [x] (mldoc/typ-drawer? x typ)) ast)
             lines (string/split-lines content)

+ 3 - 2
src/main/frontend/util/property.cljs

@@ -5,6 +5,7 @@
             [frontend.config :as config]
             [medley.core :as medley]
             [logseq.graph-parser.util :as gp-util]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [frontend.format.mldoc :as mldoc]
             [frontend.text :as text]
             [frontend.util.cursor :as cursor]))
@@ -187,7 +188,7 @@
         properties (filter (fn [[k _v]] ((built-in-properties) k)) properties)]
     (if (seq properties)
       (let [lines (string/split-lines content)
-            ast (mldoc/->edn content (mldoc/default-config format))
+            ast (mldoc/->edn content (gp-mldoc/default-config format))
             [title body] (if (mldoc/block-with-title? (first (ffirst ast)))
                            [(first lines) (rest lines)]
                            [nil lines])
@@ -233,7 +234,7 @@
    (insert-property format content key value false))
   ([format content key value front-matter?]
    (when (string? content)
-     (let [ast (mldoc/->edn content (mldoc/default-config format))
+     (let [ast (mldoc/->edn content (gp-mldoc/default-config format))
            title? (mldoc/block-with-title? (ffirst (map first ast)))
            has-properties? (or (and title?
                                     (mldoc/properties? (second ast)))

+ 61 - 0
src/main/logseq/graph_parser/mldoc.cljc

@@ -0,0 +1,61 @@
+(ns ^:nbb-compatible logseq.graph-parser.mldoc
+  (:require #?(:org.babashka/nbb ["mldoc$default" :refer [Mldoc]]
+               :default ["mldoc" :refer [Mldoc]])
+            [goog.object :as gobj]
+            [cljs-bean.core :as bean]
+            [clojure.string :as string]))
+
+(defonce parseJson (gobj/get Mldoc "parseJson"))
+(defonce parseInlineJson (gobj/get Mldoc "parseInlineJson"))
+(defonce astExportMarkdown (gobj/get Mldoc "astExportMarkdown"))
+
+;; NOTE: All references to js/ have to have a :default conditional because of clojure
+(def default-references
+  #?(:cljs
+     (js/JSON.stringify
+      (clj->js {:embed_blocks []
+                :embed_pages []}))
+     :default {}))
+
+(defn- convert-export-md-remove-options [opts]
+  (->> opts
+       (mapv (fn [opt]
+               (case opt
+                 :page-ref ["Page_ref"]
+                 :emphasis ["Emphasis"]
+                 [])))
+       (remove empty?)))
+
+(defn parse-json
+  [content config]
+  (parseJson content config))
+
+(defn inline-parse-json
+  [text config]
+  (parseInlineJson text config))
+
+(defn ast-export-markdown
+  [ast config references]
+  (astExportMarkdown ast
+                     config
+                     (or references default-references)))
+
+(defn default-config
+  ([format]
+   (default-config format {:export-heading-to-list? false}))
+  ([format {:keys [export-heading-to-list? export-keep-properties? export-md-indent-style export-md-remove-options parse_outline_only?]}]
+   (let [format (string/capitalize (name (or format :markdown)))]
+     (->> {:toc false
+           :parse_outline_only (or parse_outline_only? false)
+           :heading_number false
+           :keep_line_break true
+           :format format
+           :heading_to_list (or export-heading-to-list? false)
+           :exporting_keep_properties export-keep-properties?
+           :export_md_indent_style export-md-indent-style
+           :export_md_remove_options
+           (convert-export-md-remove-options export-md-remove-options)}
+          (filter #(not (nil? (second %))))
+          (into {})
+          (bean/->js)
+          #?(:cljs js/JSON.stringify :default identity)))))

+ 67 - 1
src/test/frontend/format/mldoc_test.cljs

@@ -1,6 +1,11 @@
 (ns frontend.format.mldoc-test
   (:require [frontend.format.mldoc :as mldoc]
-            [cljs.test :refer [testing deftest are]]))
+            [logseq.graph-parser.mldoc :as gp-mldoc]
+            ["fs" :as fs]
+            ["child_process" :as child-process]
+            [clojure.string :as string]
+            [clojure.edn :as edn]
+            [cljs.test :refer [testing deftest are is]]))
 
 (deftest test-link
   (testing "non-link"
@@ -37,3 +42,64 @@
   (testing "parsing links should be finished"
     (are [x y] (= (mldoc/link? :markdown x) y)
       "[YouTube](https://www.youtube.com/watch?v=-8ym7pyUs9gL) - [Vimeo](https://vimeo.com/677920303) {{youtube https://www.youtube.com/watch?v=-8ym7pyUs9g}}" true)))
+
+;; TODO: Reuse with repo-test fns
+(defn- slurp
+  "Like clojure.core/slurp"
+  [file]
+  (str (fs/readFileSync file)))
+
+(defn- sh
+  "Run shell cmd synchronously and print to inherited streams by default. Aims
+    to be similar to babashka.tasks/shell"
+  [cmd opts]
+  (child-process/spawnSync (first cmd)
+                           (clj->js (rest cmd))
+                           (clj->js (merge {:stdio "inherit"} opts))))
+
+(defn- build-graph-files
+  [dir]
+  (let [files (->> (str (.-stdout (sh ["git" "ls-files"]
+                                      {:cwd dir :stdio nil})))
+                   string/split-lines
+                   (filter #(re-find #"^(pages|journals)" %))
+                   (map #(str dir "/" %)))]
+    (mapv #(hash-map :file/path % :file/content (slurp %)) files)))
+
+;; TODO: Add clone docs step
+(deftest ^:integration test->edn
+  (let [graph-dir "src/test/docs"
+        files (build-graph-files graph-dir)
+        asts-by-file (->> files
+                          (map (fn [{:file/keys [path content]}]
+                                 (let [format (if (string/ends-with? path ".org")
+                                                :org :markdown)]
+                                   [path
+                                    (mldoc/->edn content
+                                                 (gp-mldoc/default-config format))])))
+                          (into {}))]
+    (is (= {"CommentBlock" 1,
+            "Custom" 41,
+            "Displayed_Math" 1,
+            "Drawer" 1,
+            "Example" 20,
+            "Footnote_Definition" 2,
+            "Heading" 3493,
+            "Hiccup" 15,
+            "List" 36,
+            "Paragraph" 411,
+            "Properties" 104,
+            "Property_Drawer" 188,
+            "Quote" 9,
+            "Raw_Html" 12,
+            "Src" 56,
+            "Table" 4}
+           (->> asts-by-file (mapcat val) (map ffirst) frequencies))
+        "AST node type counts")
+
+    ;; This is just temporary
+    (is (= (edn/read-string (slurp "mldoc-asts.edn"))
+           asts-by-file)
+        "Matches initial AST")
+    #_(println "Wrote asts for" (count asts-by-file) "files")
+    #_(fs/writeFileSync "mldoc-asts.edn" (pr-str asts-by-file))))

+ 2 - 1
src/test/frontend/parser.cljs

@@ -1,8 +1,9 @@
 (ns frontend.parser
   (:require [cljs.test :refer [is deftest]]
+            [logseq.graph-parser.mldoc :as gp-mldoc]
             [frontend.format.mldoc :as mldoc :refer [->edn]]))
 
-(def md-config (mldoc/default-config :markdown))
+(def md-config (gp-mldoc/default-config :markdown))
 
 (deftest src-test
   (is (=