Browse Source

feat: semsearch vector storage handler creating

Junyi Du 2 years ago
parent
commit
28fae455bd

+ 1 - 0
libs/CHANGELOG.md

@@ -61,6 +61,7 @@ All notable changes to this project will be documented in this file.
   `App.registerSearchService<T extends IPluginSearchServiceHooks>(s: T): void`
 - Support hooks for text encoder service. _#Alpha stage_
   `App.registerTextEncoderService<T extends IPluginTextEncoderServiceHooks>(s: T): void`
+  Should have a option record `modelDim`, which receive an integer represents the embedding's dimension
 - Support `focus` option for `App.insertBlock`. Credit
   to [[[tennox](https://github.com/tennox)]] [#PR](https://github.com/logseq/logseq/commit/4217057a44de65e5c64be37857af2fb4e9534b24)
 

+ 1 - 0
src/main/frontend/ai/text_encoder.cljs

@@ -30,6 +30,7 @@
   [text service]
   (call-service! service "textEncoder:textEncode" {:text text} true))
 
+;; TODO: support selecting text encoder
 (defn text-encode
   "Return a promise of the encoded text
    Or return nil if no matching text encoder available"

+ 15 - 39
src/main/frontend/ai/vector_store.cljs

@@ -21,72 +21,48 @@
   [id-str dim]
   ;; Check if the store already exists
   (if (contains? @*stores id-str)
-    (p/rejected (js/Error. (str "Vector store " id-str " already exists")))
+    (@*stores id-str)
     (let [store (VectorStorage. id-str dim)]
       (swap! *stores assoc id-str store)
-      (p/let [_ (.initialize store)]
-        (.load store)))))
-
-(defn try-create
-  "Get a vector store handler in the app runtime state
-   It the store is persisted, it will be loaded from the disk
-   If the handler is already created, return a rejected promise.
-   - id-str: identifier for the vector store (should be unique!)
-   - dim: dimension of the vector
-   
-   Returns a promise of the vector store prepared (loaded, and the num of vecs that loaded)
-   or nil if the store already exists"
-  [id-str dim]
-  ;; Check if the store already exists
-  (if (contains? @*stores id-str)
-    nil
-    (let [store (VectorStorage. id-str dim)]
-      (swap! *stores assoc id-str store)
-      (p/let [_ (.initialize store)]
-        (.load store)))))
+      (p/let [_ (.initialize store)
+              _ (.load store)]
+        store))))
 
 (defn add
   "Add a record to the vector store
-   - id-str: identifier for the vector store
+   - store: store handler (conn)
    - embed: the vector to be added
    - key: identifier for the record
    - data: attached metadata for the record (notice: IPC required, so don't send big objects)
    
    Returns a promise of the vector store addition
    or throw an error if the store doesn't exist"
-    [id-str embed key data]
-    (let [store (@*stores id-str)]
-      (when-not store
-        (throw (js/Error. (str "Vector store " id-str " doesn't exist"))))
-      (.add store embed key data)))
+  ([store embed key]
+   (.add store embed key))
+  ([store embed key data]
+   (.add store embed key data)))
 
 (defn rm
   "Remove a record from the vector store
-   - id-str: identifier for the vector store
+   - store: store handler (conn)
    - key: identifier for the record
    
    Returns a promise of the vector store removal
    or throw an error if the store doesn't exist"
-    [id-str key]
-    (let [store (@*stores id-str)]
-      (when-not store
-        (throw (js/Error. (str "Vector store " id-str " doesn't exist"))))
-      (.remove store key)))
+    [store key]
+    (.remove store key))
 
 (defn search
   "Search for records in the vector store
-   - id-str: identifier for the vector store
+   - store: store handler (conn)
    - embed: the vector to be searched
    - return-k: number of records to be returned
    
    Returns a promise of the vector store search
    which contains a list of records
    or throw an error if the store doesn't exist"
-    [id-str embed return-k]
-    (let [store (@*stores id-str)]
-      (when-not store
-        (throw (js/Error. (str "Vector store " id-str " doesn't exist"))))
-      (.search store embed return-k)))
+    [store embed return-k]
+    (.search store embed return-k))
 
 (defn reset
   "Remove all records from the vector store

+ 33 - 10
src/main/frontend/search/semantic.cljs

@@ -2,11 +2,12 @@
   "Browser implementation of search protocol"
   (:require [frontend.search.protocol :as protocol]
             [frontend.ai.vector-store :as vector-store]
-            ;; [frontend.ai.text-encoder :as text-encoder]
-            ;; [promesa.core :as p]
-            ))
+            [frontend.ai.text-encoder :as text-encoder]
+            [promesa.core :as p]
+            
+            [frontend.state :as state]))
 
-(defn template-comment-string
+(defn idstr-template-string
   "Accepts repo url and returns a string for the vector store comment"
   [url]
   (str "logseq-semsearch-vs-" url))
@@ -27,21 +28,43 @@
   (transact-blocks! [_this {:keys [blocks-to-remove-set
                                    blocks-to-add]
                             :as data}]
-    (prn "sematic: transact-blocks!") ;; TODO Junyi
-    (prn data)
-    (prn blocks-to-remove-set)
-    (prn blocks-to-add))
+    ;; Step 1: encoding all sentences
+    ;; Step 2: inference vec length
+    ;; Step 3: create vector store (optional)
+    ;; Setp 4: add to vec store
+    ;; {:blocks-to-remove-set #{16634}, :blocks-to-add ({:id 16634, :uuid "647dcfc7-2aba-4015-8b71-cdf73c552761", :page 12, :content "adding me 2"})}
+    ;; Handling blocks to add
+    (let [encoder      (state/get-semsearch-encoder)
+          encoder-name (:name encoder)
+          encoder-dim  (get-in encoder [:opts :modelDim])
+          store-conn   (if encoder-dim
+                         (vector-store/create (idstr-template-string repo) encoder-dim)
+                         (throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
+          block->promise (fn [block]
+                           ;; TODO Junyi: Chunker
+                           (p/let [embed (text-encoder/text-encode (:content block) encoder-name)
+                                   _     (vector-store/add store-conn embed (:uuid block))]))
+          embed-promises (map block->promise blocks-to-add)])
+                    (p/let [uuids (map  blocks-to-add)
+                            store-handler (vector-store/try-create )]
+                      (vector-store/add store-handler embed uuid))
+                    (prn "sematic: transact-blocks!") ;; TODO Junyi
+                    (prn data)
+                    (prn blocks-to-remove-set)
+                    (prn blocks-to-add))
   
   (transact-pages! [_this data]
+                   
+                   (vector-store/create "test" 128)
     (prn "semantic: transact-pages!") ;; TODO Junyi
     (prn data))
 
   (truncate-blocks! [_this]
     (-> repo
-        (template-comment-string)
+        (idstr-template-string)
         (vector-store/reset)))
 
   (remove-db! [_this]
     (-> repo
-        (template-comment-string)
+        (idstr-template-string)
         (vector-store/reset))))