extract_test.cljs 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. (ns logseq.graph-parser.extract-test
  2. (:require [cljs.test :refer [deftest is are]]
  3. [logseq.graph-parser.extract :as extract]
  4. [datascript.core :as d]
  5. [logseq.db.frontend.schema :as db-schema]))
  6. ;; This is a copy of frontend.util.fs/multiplatform-reserved-chars for reserved chars testing
  7. (def multiplatform-reserved-chars ":\\*\\?\"<>|\\#\\\\")
  8. ;; Stuffs should be parsable (don't crash) when users dump some random files
  9. (deftest page-name-parsing-tests
  10. (is (string? (#'extract/tri-lb-title-parsing "___-_-_-_---___----")))
  11. (is (string? (#'extract/tri-lb-title-parsing "_____///____---___----")))
  12. (is (string? (#'extract/tri-lb-title-parsing "/_/////---/_----")))
  13. (is (string? (#'extract/tri-lb-title-parsing "/\\#*%lasdf\\//__--dsll_____----....-._0x2B")))
  14. (is (string? (#'extract/tri-lb-title-parsing "/\\#*%l;;&&;&\\//__--dsll_____----....-._0x2B")))
  15. (is (string? (#'extract/tri-lb-title-parsing multiplatform-reserved-chars)))
  16. (is (string? (#'extract/tri-lb-title-parsing "dsa&amp&semi;l dsalfjk jkl"))))
  17. (deftest uri-decoding-tests
  18. (is (= (#'extract/safe-url-decode "%*-sd%%%saf%=lks") "%*-sd%%%saf%=lks")) ;; Contains %, but invalid
  19. (is (= (#'extract/safe-url-decode "%2FDownloads%2FCNN%3AIs%5CAll%3AYou%20Need.pdf") "/Downloads/CNN:Is\\All:You Need.pdf"))
  20. (is (= (#'extract/safe-url-decode "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费;dla") "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费;dla")))
  21. (deftest page-name-sanitization-backward-tests
  22. (is (= "abc.def.ghi.jkl" (#'extract/tri-lb-title-parsing "abc.def.ghi.jkl")))
  23. (is (= "abc/def/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%2Fdef%2Fghi%2Fjkl")))
  24. (is (= "abc%/def/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%25%2Fdef%2Fghi%2Fjkl")))
  25. (is (= "abc%2——ef/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%2——ef%2Fghi%2Fjkl")))
  26. (is (= "abc&amp;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&amp;2Fghi%2Fjkl")))
  27. (is (= "abc&lt;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&lt;2Fghi%2Fjkl")))
  28. (is (= "abc&percnt;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&percnt;2Fghi%2Fjkl")))
  29. (is (= "abc&semi;&;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&semi;&;2Fghi%2Fjkl")))
  30. ;; happens when importing some compatible files on *nix / macOS
  31. (is (= multiplatform-reserved-chars (#'extract/tri-lb-title-parsing multiplatform-reserved-chars))))
  32. (deftest path-utils-tests
  33. (is (= "asldk lakls " (#'extract/path->file-body "/data/app/asldk lakls .lsad")))
  34. (is (= "asldk lakls " (#'extract/path->file-body "asldk lakls .lsad")))
  35. (is (= "asldk lakls" (#'extract/path->file-body "asldk lakls")))
  36. (is (= "asldk lakls" (#'extract/path->file-body "/data/app/asldk lakls")))
  37. (is (= "asldk lakls" (#'extract/path->file-body "file://data/app/asldk lakls.as")))
  38. (is (= "中文asldk lakls" (#'extract/path->file-body "file://中文data/app/中文asldk lakls.as"))))
  39. (defn- extract [file content & [options]]
  40. (extract/extract file
  41. content
  42. (merge {:block-pattern "-" :db (d/empty-db db-schema/schema)
  43. :verbose false}
  44. options)))
  45. (defn- extract-block-content
  46. [text]
  47. (let [{:keys [blocks]} (extract "a.md" text)]
  48. (mapv :block/content blocks)))
  49. (defn- extract-title [file text]
  50. (-> (extract file text) :pages first :block/properties :title))
  51. (deftest extract-blocks-for-headings
  52. (is (= ["a" "b" "c"]
  53. (extract-block-content
  54. "- a
  55. - b
  56. - c")))
  57. (is (= ["## hello" "world" "nice" "nice" "bingo" "world"]
  58. (extract-block-content "## hello
  59. - world
  60. - nice
  61. - nice
  62. - bingo
  63. - world")))
  64. (is (= ["# a" "## b" "### c" "#### d" "### e" "f" "g" "h" "i" "j"]
  65. (extract-block-content "# a
  66. ## b
  67. ### c
  68. #### d
  69. ### e
  70. - f
  71. - g
  72. - h
  73. - i
  74. - j"))))
  75. (deftest parse-page-title
  76. (is (= nil
  77. (extract-title "foo.org" "")))
  78. (is (= "Howdy"
  79. (extract-title "foo.org" "#+title: Howdy")))
  80. (is (= "Howdy"
  81. (extract-title "foo.org" "#+TITLE: Howdy")))
  82. (is (= "Howdy"
  83. (extract-title "foo.org" "#+TiTlE: Howdy")))
  84. (is (= "diagram/abcdef"
  85. (extract-title "foo.org" ":PROPERTIES:
  86. :ID: 72289d9a-eb2f-427b-ad97-b605a4b8c59b
  87. :END:
  88. #+TITLE: diagram/abcdef")))
  89. (is (= "diagram/abcdef"
  90. (extract-title "foo.org" ":PROPERTIES:
  91. :ID: 72289d9a-eb2f-427b-ad97-b605a4b8c59b
  92. :END:
  93. #+title: diagram/abcdef"))))
  94. (deftest extract-blocks-with-property-pages-config
  95. (are [extract-args expected-refs]
  96. (= expected-refs
  97. (->> (apply extract extract-args)
  98. :blocks
  99. (mapcat #(->> % :block/refs (map :block/name)))
  100. set))
  101. ["a.md" "foo:: #bar\nbaz:: #bing" {:user-config {:property-pages/enabled? true}}]
  102. #{"bar" "bing" "foo" "baz"}
  103. ["a.md" "foo:: #bar\nbaz:: #bing" {:user-config {:property-pages/enabled? false}}]
  104. #{"bar" "bing"}))
  105. (deftest test-regression-1902
  106. (is (= ["line1" "line2" "line3" "line4"]
  107. (extract-block-content
  108. "- line1
  109. - line2
  110. - line3
  111. - line4"))))
  112. (def foo-edn
  113. "Example exported whiteboard page as an edn exportable."
  114. '{:blocks
  115. ({:block/content "foo content a",
  116. :block/format :markdown},
  117. {:block/content "foo content b",
  118. :block/format :markdown}),
  119. :pages
  120. ({:block/format :markdown,
  121. :block/original-name "Foo"
  122. :block/uuid #uuid "a846e3b4-c41d-4251-80e1-be6978c36d8c"
  123. :block/properties {:title "my whiteboard foo"}})})
  124. (deftest test-extract-whiteboard-edn
  125. (let [{:keys [pages blocks]} (extract/extract-whiteboard-edn "/whiteboards/foo.edn" (pr-str foo-edn) {})
  126. page (first pages)]
  127. (is (= (get-in page [:block/file :file/path]) "/whiteboards/foo.edn"))
  128. (is (= (:block/name page) "foo"))
  129. (is (= (:block/type page) #{"whiteboard" "page"}))
  130. (is (= (:block/original-name page) "Foo"))
  131. (is (every? #(= (:block/parent %) [:block/uuid #uuid "a846e3b4-c41d-4251-80e1-be6978c36d8c"]) blocks))))