text.cljs 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. (ns logseq.graph-parser.text
  2. "Miscellaneous text util fns for the parser"
  3. (:require ["path" :as path]
  4. [goog.string :as gstring]
  5. [clojure.string :as string]
  6. [clojure.set :as set]
  7. [logseq.graph-parser.property :as gp-property]
  8. [logseq.graph-parser.mldoc :as gp-mldoc]
  9. [logseq.graph-parser.util :as gp-util]
  10. [logseq.graph-parser.util.page-ref :as page-ref]))
  11. (defn get-file-basename
  12. [path]
  13. (when-not (string/blank? path)
  14. ;; Same as util/node-path.name
  15. (.-name (path/parse (string/replace path "+" "/")))))
  16. (def page-ref-re-0 #"\[\[(.*)\]\]")
  17. (def org-page-ref-re #"\[\[(file:.*)\]\[.+?\]\]")
  18. (def markdown-page-ref-re #"\[(.*)\]\(file:.*\)")
  19. (defn get-page-name
  20. "Extracts page names from format-specific page-refs e.g. org/md specific and
  21. logseq page-refs. Only call in contexts where format-specific page-refs are
  22. used. For logseq page-refs use page-ref/get-page-name"
  23. [s]
  24. (and (string? s)
  25. (or (when-let [[_ label _path] (re-matches markdown-page-ref-re s)]
  26. (string/trim label))
  27. (when-let [[_ path _label] (re-matches org-page-ref-re s)]
  28. (some-> (get-file-basename path)
  29. (string/replace "." "/")))
  30. (-> (re-matches page-ref-re-0 s)
  31. second))))
  32. (defn page-ref-un-brackets!
  33. [s]
  34. (or (get-page-name s) s))
  35. (defn get-nested-page-name
  36. [page-name]
  37. (when-let [first-match (re-find page-ref/page-ref-without-nested-re page-name)]
  38. (second first-match)))
  39. (defn- remove-level-space-aux!
  40. [text pattern space? trim-left?]
  41. (let [pattern (gstring/format
  42. (if space?
  43. "^[%s]+\\s+"
  44. "^[%s]+\\s?")
  45. pattern)
  46. text (if trim-left? (string/triml text) text)]
  47. (string/replace-first text (re-pattern pattern) "")))
  48. (defn remove-level-spaces
  49. ([text format block-pattern]
  50. (remove-level-spaces text format block-pattern false true))
  51. ([text format block-pattern space?]
  52. (remove-level-spaces text format block-pattern space? true))
  53. ([text format block-pattern space? trim-left?]
  54. (when format
  55. (cond
  56. (string/blank? text)
  57. ""
  58. (and (= "markdown" (name format))
  59. (string/starts-with? text "---"))
  60. text
  61. :else
  62. (remove-level-space-aux! text block-pattern space? trim-left?)))))
  63. (defn namespace-page?
  64. [p]
  65. (and (string? p)
  66. (string/includes? p "/")
  67. (not (string/starts-with? p "../"))
  68. (not (string/starts-with? p "./"))
  69. (not (gp-util/url? p))))
  70. (defn parse-non-string-property-value
  71. "Return parsed non-string property value or nil if none is found"
  72. [v]
  73. (cond
  74. (= v "true")
  75. true
  76. (= v "false")
  77. false
  78. (re-find #"^\d+$" v)
  79. (parse-long v)))
  80. (defn- get-ref-from-ast
  81. [[typ data]]
  82. (case typ
  83. "Link"
  84. (case (first (:url data))
  85. "Page_ref"
  86. (second (:url data))
  87. "Search"
  88. (second (:url data))
  89. nil)
  90. "Nested_link"
  91. (page-ref/get-page-name (:content data))
  92. "Tag"
  93. (if (= "Plain" (ffirst data))
  94. (second (first data))
  95. (get-ref-from-ast (first data)))
  96. nil))
  97. (defn extract-refs-from-mldoc-ast
  98. [v]
  99. (into #{}
  100. (comp
  101. (remove gp-mldoc/ast-link?)
  102. (keep get-ref-from-ast)
  103. (map string/trim))
  104. v))
  105. (defn- sep-by-comma
  106. [s]
  107. {:pre (string? s)}
  108. (->>
  109. (string/split s #"[\,,]{1}")
  110. (map string/trim)
  111. (remove string/blank?)
  112. (set)))
  113. (defn separated-by-commas?
  114. [config-state k]
  115. (let [k' (if (keyword? k) k (keyword k))]
  116. (contains? (set/union gp-property/editable-linkable-built-in-properties
  117. (set (get config-state :property/separated-by-commas)))
  118. k')))
  119. (defn- extract-refs-by-commas
  120. [v format]
  121. (let [plains (->> (map first (gp-mldoc/->edn v (gp-mldoc/default-config format)))
  122. first
  123. second
  124. (filter #(and (vector? %) (= "Plain" (first %))))
  125. (map second))]
  126. (set (mapcat sep-by-comma plains))))
  127. (defn- parse-property-refs [k v mldoc-references-ast config-state]
  128. (let [refs (extract-refs-from-mldoc-ast mldoc-references-ast)
  129. property-separated-by-commas? (separated-by-commas? config-state k)]
  130. (if property-separated-by-commas?
  131. (->> (extract-refs-by-commas v (get config-state :format :markdown))
  132. (set/union refs))
  133. refs)))
  134. (defn parse-property
  135. "Property value parsing that takes into account built-in properties, format
  136. and user config"
  137. [k v mldoc-references-ast config-state]
  138. (let [v' (string/trim (str v))]
  139. (cond
  140. (contains? (set/union
  141. (set (map name (gp-property/unparsed-built-in-properties)))
  142. (get config-state :ignored-page-references-keywords))
  143. (name k))
  144. v'
  145. (string/blank? v')
  146. nil
  147. (gp-util/wrapped-by-quotes? v')
  148. v'
  149. ;; parse property value as needed
  150. :else
  151. (let [refs (parse-property-refs k v' mldoc-references-ast config-state)]
  152. (if (seq refs)
  153. refs
  154. (if-some [new-val (parse-non-string-property-value v')]
  155. new-val
  156. v'))))))