Ver Fonte

feat: semsearch page transaction hook

Junyi Du há 2 anos atrás
pai
commit
5a675bd3b8
2 ficheiros alterados com 82 adições e 35 exclusões
  1. 17 1
      src/main/frontend/search/db.cljs
  2. 65 34
      src/main/frontend/search/semantic.cljs

+ 17 - 1
src/main/frontend/search/db.cljs

@@ -19,6 +19,22 @@
   []
   (state/block-content-max-length (state/get-current-repo)))
 
+(defn inject-original-name
+  "Injects the original-name into the index content."
+  [original-name content]
+  (str "$pfts_f6ld>$ " original-name " $<pfts_f6ld$ " content))
+
+(defn extract-original-name-and-content
+  "Extracts the original-name and the content from the index content inside DB."
+  [content]
+  (let [start-ind (string/index-of content "$pfts_f6ld>$ ")
+        end-ind (string/index-of content " $<pfts_f6ld$ ")]
+    (if (and start-ind end-ind)
+      (let [title   (subs content (+ start-ind 13) end-ind)
+            content (subs content (+ end-ind 13))]
+        [title content])
+      [nil content])))
+
 (defn block->index
   "Convert a block to the index for searching"
   [{:block/keys [uuid page content] :as block}]
@@ -40,7 +56,7 @@
         {:id   (:db/id page)
          :uuid (str uuid)
          ;; Add page name to the index
-         :content (sanitize (str "$pfts_f6ld>$ " original-name " $<pfts_f6ld$ " content))}))))
+         :content (sanitize (inject-original-name original-name content))}))))
 
 (defn build-blocks-indice
   ;; TODO: Remove repo effects fns further up the call stack. db fns need standardization on taking connection

+ 65 - 34
src/main/frontend/search/semantic.cljs

@@ -4,6 +4,7 @@
             [cljs-bean.core :as bean]
             [promesa.core :as p]
             [frontend.search.protocol :as protocol]
+            [frontend.search.db :as search-db]
             [frontend.ai.vector-store :as vector-store]
             [frontend.ai.text-encoder :as text-encoder]
             [frontend.state :as state]
@@ -34,50 +35,80 @@
     ;; Step 3: deal with blocks-to-add
     ;; {:blocks-to-remove-set #{16634}, :blocks-to-add ({:id 16634, :uuid "647dcfc7-2aba-4015-8b71-cdf73c552761", :page 12, :content "adding me 2"})}
     ;; Handling blocks to add
-    (let [encoder      (state/get-semsearch-encoder)
-          encoder-name (:name encoder)
-          encoder-dim  (get-in encoder [:opts :modelDim])
-          store-conn   (if encoder-dim
-                         (vector-store/create (idstr-template-string repo) encoder-dim)
-                         (throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
-          eid-del->vs (fn [eid]
+    (p/let [encoder      (state/get-semsearch-encoder)
+            encoder-name (:name encoder)
+            encoder-dim  (get-in encoder [:opts :modelDim])
+            store-conn   (if encoder-dim
+                           (vector-store/create (idstr-template-string repo) encoder-dim)
+                           (throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
+            eid-del->vs (fn [eid]
                        ;; Would replace existing promise in queue (if any)
                        ;; If the promise is already in pending state, 
                        ;; there's a race condition that the promise executed
                        ;; before the pending promise is resolved
-                       (let [del->vs (fn [] ;; Promise factory
-                                       (vector-store/rm store-conn (str eid)))]
-                         (.addTask taskQueue (str eid) del->vs)))
-          block-add->vs (fn [block] 
+                          (let [del->vs (fn [] ;; Promise factory
+                                          (vector-store/rm store-conn (str eid)))]
+                            (.addTask taskQueue (str eid) del->vs)))
+            block-add->vs (fn [block]
                        ;; Would replace the task if there is already a task with the same id in the queue
                        ;; Here we use stringified id as key to keep consistency with the logMind type annotation
-                       (let [add->vs (fn []
-                                    (p/let [metadata  {:snippet (gp-util/safe-subs (:content block) 0 20)
-                                                       :page    (:page block)
-                                                       :id      (:id block)
-                                                       :uuid    (:uuid block)}
-                                            embeds    (text-encoder/text-encode (:content block) encoder-name)
-                                            _         (vector-store/rm store-conn (str (:id block)))
-                                            emb-add->vs   (fn [embed]
-                                                            (vector-store/add store-conn embed (str (:id block)) (bean/->js metadata)))]
-                                      (p/all (mapv emb-add->vs embeds))))]
-                         (.addTask taskQueue (str (:id block)) add->vs)))]
+                            (let [add->vs (fn []
+                                            (p/let [metadata  {:snippet (gp-util/safe-subs (:content block) 0 20)
+                                                               :page    (:page block)
+                                                               :id      (:id block)
+                                                               :uuid    (:uuid block)}
+                                                    embeds    (text-encoder/text-encode (:content block) encoder-name)
+                                                    _         (vector-store/rm store-conn (str (:id block)))
+                                                    emb-add->vs   (fn [embed]
+                                                                    (vector-store/add store-conn embed (str (:id block)) (bean/->js metadata)))]
+                                              (p/all (mapv emb-add->vs embeds))))]
+                              (.addTask taskQueue (str (:id block)) add->vs)))]
       ;; Delete first, then add
       (mapv eid-del->vs blocks-to-remove-set)
       (mapv block-add->vs blocks-to-add)))
-  
-  (transact-pages! [_this data] 
-    (prn "semantic: transact-pages!") ;; TODO Junyi
-    (prn data))
+
+  (transact-pages! [_this {:keys [pages-to-remove-set
+                                  pages-to-add]}]
+    ;; {:pages-to-remove-set nil, :pages-to-add #{{:id 2780, :uuid "64cc77a2-af63-47b1-860d-dab30b7607a6", :content "$pfts_f6ld>$ aug 4th, 2023 $<pfts_f6ld$ - good bye!"}}}
+    (p/let [encoder (state/get-semsearch-encoder)
+            encoder-name (:name encoder)
+            encoder-dim  (get-in encoder [:opts :modelDim])
+            store-conn   (if encoder-dim
+                           (vector-store/create (idstr-template-string repo) encoder-dim)
+                           (throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
+            eid-del->vs  (fn [eid]
+                           ;; Would replace existing promise in queue (if any)
+                           ;; If the promise is already in pending state, 
+                           ;; there's a race condition that the promise executed
+                           ;; before the pending promise is resolved
+                           (let [del->vs (fn [] ;; Promise factory
+                                           (vector-store/rm store-conn (str eid)))]
+                             (.addTask taskQueue (str eid) del->vs)))
+            page-add->vs (fn [page]
+                            ;; Would replace the task if there is already a task with the same id in the queue
+                            ;; Here we use stringified id as key to keep consistency with the logMind type annotation
+                            (let [add->vs (fn []
+                                            (p/let [[title content] (search-db/extract-original-name-and-content (:content page))
+                                                    metadata  {:title   (or title "No title page")
+                                                               :id      (:id page)
+                                                               :uuid    (:uuid page)}
+                                                    embeds    (text-encoder/text-encode (str title "\n" content) encoder-name)
+                                                    _         (vector-store/rm store-conn (str (:id page)))
+                                                    emb-add->vs   (fn [embed]
+                                                                    (vector-store/add store-conn embed (str (:id page)) (bean/->js metadata)))]
+                                              (p/all (mapv emb-add->vs embeds))))]
+                              (.addTask taskQueue (str (:id page)) add->vs)))]
+      (mapv eid-del->vs pages-to-remove-set)
+      (mapv page-add->vs pages-to-add)))
 
   (truncate-blocks! [_this]
-                    (-> repo
-                        (idstr-template-string)
-                        (vector-store/reset))
-                    (.clean taskQueue))
+    (-> repo
+        (idstr-template-string)
+        (vector-store/reset))
+    (.clean taskQueue))
 
   (remove-db! [_this]
-              (-> repo
-                  (idstr-template-string)
-                  (vector-store/reset))
-              (.clean taskQueue)))
+    (-> repo
+        (idstr-template-string)
+        (vector-store/reset))
+    (.clean taskQueue)))