فهرست منبع

feat(full-text-search): use flexsearch instead of fuzzysort

For full-text search.
Tienson Qin 4 سال پیش
والد
کامیت
42f053d89c

+ 2 - 0
externs.js

@@ -17,6 +17,8 @@ dummy.getRangeAt = function() {};
 dummy.getElementsByClassName = function() {};
 dummy.containsNode = function() {};
 dummy.select = function() {};
+dummy.search = function() {};
+dummy.add = function() {};
 dummy.closest = function () {};
 dummy.setAttribute = function() {};
 dummy.getAttribute = function() {};

+ 1 - 0
package.json

@@ -61,6 +61,7 @@
         "diff": "5.0.0",
         "diff-match-patch": "^1.0.5",
         "electron": "^11.2.0",
+        "flexsearch": "^0.6.32",
         "fs": "^0.0.1-security",
         "fuzzysort": "git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2",
         "gulp-cached": "^1.1.1",

+ 36 - 1
src/main/frontend/components/search.cljs

@@ -31,6 +31,41 @@
   (let [switch (reductions not= true (map pred? coll (rest coll)))]
     (map (partial map first) (partition-by second (map list coll switch)))))
 
+(defn highlight-exact-query
+  [content q]
+  (let [q-words (string/split q #" ")
+        lc-content (string/lower-case content)
+        lc-q (string/lower-case q)]
+    (if (or (string/includes? lc-content lc-q)
+            (not (re-find #" " q)))
+      (let [i (string/index-of lc-content lc-q)
+            [before after] [(subs content 0 i) (subs content (+ i (count q)))]]
+        [:p
+         (when-not (string/blank? before)
+           [:span before])
+         [:mark (subs content i (+ i (count q)))]
+         (when-not (string/blank? after)
+           [:span after])])
+      (let [elements (loop [words q-words
+                            content content
+                            result []]
+                       (if (and (seq words) content)
+                         (let [word (first words)
+                               lc-word (string/lower-case word)
+                               lc-content (string/lower-case content)]
+                           (if-let [i (string/index-of lc-content lc-word)]
+                             (recur (rest words)
+                                    (subs content (+ i (count word)))
+                                    (vec
+                                     (concat result
+                                             [[:span (subs content 0 i)]
+                                              [:mark (subs content i (+ i (count word)))]])))
+                             (recur nil
+                                    content
+                                    result)))
+                         (conj result [:span content])))]
+        [:p elements]))))
+
 (rum/defc highlight-fuzzy
   [content indexes]
   (let [n (count content)
@@ -182,7 +217,7 @@
                                          (:page/name page))]
                             [:div.flex-1
                              [:div.text-sm.font-medium (str "-> " page)]
-                             (highlight-fuzzy content indexes)])
+                             (highlight-exact-query content search-q)])
 
                           nil))})])))
 

+ 1 - 1
src/main/frontend/handler/editor.cljs

@@ -1767,7 +1767,7 @@
     (remove
      (fn [h]
        (contains? current-and-parents (:block/uuid h)))
-     (search/search q 10))))
+     (search/block-search q 10))))
 
 (defn get-matched-templates
   [q]

+ 8 - 5
src/main/frontend/handler/search.cljs

@@ -3,14 +3,17 @@
             [frontend.state :as state]
             [goog.dom :as gdom]
             [frontend.search :as search]
-            [frontend.handler.notification :as notification-handler]))
+            [frontend.handler.notification :as notification-handler]
+            [promesa.core :as p]))
 
 (defn search
   [q]
-  (swap! state/state assoc :search/result
-         {:pages (search/page-search q)
-          :files (search/file-search q)
-          :blocks (search/search q 10)}))
+  ;; TODO: separate rendering for blocks
+  (p/let [blocks-result (search/block-search q 10)]
+    (swap! state/state assoc :search/result
+           {:pages (search/page-search q)
+            :files (search/file-search q)
+            :blocks blocks-result})))
 
 (defn clear-search!
   []

+ 51 - 40
src/main/frontend/search.cljs

@@ -12,25 +12,30 @@
             [cljs-bean.core :as bean]
             [goog.object :as gobj]
             ["fuzzysort" :as fuzzy]
-            [medley.core :as medley]))
+            ["flexsearch" :as flexsearch]
+            [medley.core :as medley]
+            [promesa.core :as p]))
 
 (def fuzzy-go (gobj/get fuzzy "go"))
 (defonce prepare (gobj/get fuzzy "prepare"))
 (defonce highlight (gobj/get fuzzy "highlight"))
 
 (defn go
-  [q indice opts]
-  (fuzzy-go q indice opts))
+  [q indice-type indice opts]
+  (case indice-type
+    :page
+    (fuzzy-go q indice opts)
+
+    :block
+    (.search indice q opts)))
 
 (defn block->index
   [{:block/keys [uuid content format] :as block}]
-  (when (<= (count content) 1000) ; performance
-    (when-let [result (->> (text/remove-level-spaces content format)
-                           (text/remove-properties!)
-                           (prepare))]
-      (gobj/set result "id" (:db/id block))
-      (gobj/set result "uuid" (str uuid))
-      result)))
+  (when-let [result (->> (text/remove-level-spaces content format)
+                         (text/remove-properties!))]
+    {:id (:db/id block)
+     :uuid (str uuid)
+     :content result}))
 
 (defn make-blocks-indice!
   []
@@ -38,9 +43,17 @@
     (let [blocks (->> (db/get-all-block-contents)
                       (map block->index)
                       (remove nil?)
-                      (bean/->js))]
-      (swap! indices assoc-in [repo :blocks] blocks)
-      blocks)))
+                      (bean/->js))
+          indice (flexsearch.
+                  (clj->js
+                   {:encode "icase"
+                    :tokenize "full"
+                    :doc {:id "id"
+                          :field ["uuid" "content"]}
+                    :async true}))]
+      (p/let [result (.add indice blocks)]
+        (swap! indices assoc-in [repo :blocks] indice))
+      indice)))
 
 (defn make-pages-indice!
   []
@@ -136,10 +149,9 @@
                                      :score (score query (.toLowerCase s))})))))
          (map :data))))
 
-(defn search
-  "Block search"
+(defn block-search
   ([q]
-   (search q 10))
+   (block-search q 10))
   ([q limit]
    (when-let [repo (state/get-current-repo)]
      (when-not (string/blank? q)
@@ -147,21 +159,17 @@
              q (escape-str q)]
          (when-not (string/blank? q)
            (let [indice (or (get-in @indices [repo :blocks])
-                            (make-blocks-indice!))
-                 result (->
-                         (go q indice (clj->js {:limit limit
-                                                :allowTypo false
-                                                :threshold -10000}))
-                         (bean/->clj))]
-             (->>
-              (map
-               (fn [{:keys [target uuid indexes]}]
-                 {:block/uuid uuid
-                  :block/content target
-                  :block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))
-                  :block/indexes indexes}) ; For result highlight
-               result)
-              (remove nil?)))))))))
+                            (make-blocks-indice!))]
+             (p/let [result (go q :block indice (clj->js {:limit limit}))
+                     result (bean/->clj result)]
+               (->>
+                (map
+                 (fn [{:keys [content uuid] :as block}]
+                   {:block/uuid uuid
+                    :block/content content
+                    :block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))})
+                 result)
+                (remove nil?))))))))))
 
 (defn page-search
   ([q]
@@ -173,11 +181,12 @@
        (when-not (string/blank? q)
          (let [indice (or (get-in @indices [repo :pages])
                           (make-pages-indice!))
-               result (->> (go q indice (clj->js {:limit limit
-                                                  :key "name"
-                                                  :allowTypo false
-                                                  :threshold -10000}))
+               result (->> (go q :page indice (clj->js {:limit limit
+                                                        :key "name"
+                                                        :allowTypo false
+                                                        :threshold -10000}))
                            (bean/->clj))]
+           ;; TODO: add indexes for highlights
            (->> (map
                  (fn [{:keys [obj]}]
                    (:name obj))
@@ -250,8 +259,10 @@
                                           (map :e)
                                           (set))]
             (swap! search-db/indices update-in [repo :blocks]
-                   (fn [blocks]
-                     (let [blocks (or blocks (array))
-                           blocks (.filter blocks (fn [block]
-                                                    (not (contains? blocks-to-remove-set (gobj/get block "id")))))]
-                       (.concat blocks (bean/->js blocks-to-add)))))))))))
+                   (fn [indice]
+                     (when indice
+                       (doseq [block-id blocks-to-remove-set]
+                         (.remove indice #js {:id block-id}))
+                       (when (seq blocks-to-add)
+                         (.add indice (bean/->js blocks-to-add))))
+                     indice))))))))

+ 5 - 0
yarn.lock

@@ -2312,6 +2312,11 @@ flatted@^3.1.0:
   resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.0.tgz#a5d06b4a8b01e3a63771daa5cb7a1903e2e57067"
   integrity sha512-tW+UkmtNg/jv9CSofAKvgVcO7c2URjhTdW1ZTkcAritblu8tajiYy7YisnIflEwtKssCtOxpnBRoCB7iap0/TA==
 
+flexsearch@^0.6.32:
+  version "0.6.32"
+  resolved "https://registry.yarnpkg.com/flexsearch/-/flexsearch-0.6.32.tgz#1e20684d317af65baa445cdd9864a5f5b320f510"
+  integrity sha512-EF1BWkhwoeLtbIlDbY/vDSLBen/E5l/f1Vg7iX5CDymQCamcx1vhlc3tIZxIDplPjgi0jhG37c67idFbjg+v+Q==
+
 flush-write-stream@^1.0.2:
   version "1.1.1"
   resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"