浏览代码

feat: semsearch vec store meta data; queue for denoising

Junyi Du 2 年之前
父节点
当前提交
4691f94e36
共有 4 个文件被更改,包括 20 次插入20 次删除
  1. 2 0
      deps/graph-parser/src/logseq/graph_parser/util.cljs
  2. 1 1
      package.json
  3. 13 15
      src/main/frontend/search/semantic.cljs
  4. 4 4
      yarn.lock

+ 2 - 0
deps/graph-parser/src/logseq/graph_parser/util.cljs

@@ -64,6 +64,8 @@
     (not (re-find #"[# \t\r\n]+" tag-name))))
 
 (defn safe-subs
+  "Cut substring from start to end, but make sure start and end are within the
+   string. No error will be thrown if start or end is out of range."
   ([s start]
    (let [c (count s)]
      (safe-subs s start c)))

+ 1 - 1
package.json

@@ -95,7 +95,7 @@
         "@isomorphic-git/lightning-fs": "^4.6.0",
         "@logseq/capacitor-file-sync": "0.0.35",
         "@logseq/diff-merge": "0.2.2",
-        "@logseq/logmind": "^0.1.0",
+        "@logseq/logmind": "^0.1.2",
         "@logseq/react-tweet-embed": "1.3.1-1",
         "@radix-ui/colors": "^0.1.8",
         "@sentry/react": "^6.18.2",

+ 13 - 15
src/main/frontend/search/semantic.cljs

@@ -1,11 +1,12 @@
 (ns frontend.search.semantic
   "Browser implementation of search protocol"
-  (:require [frontend.search.protocol :as protocol]
+  (:require ["@logseq/logmind" :refer [taskQueue]]
+            [frontend.search.protocol :as protocol]
             [frontend.ai.vector-store :as vector-store]
             [frontend.ai.text-encoder :as text-encoder]
             [promesa.core :as p]
-            
-            [frontend.state :as state]))
+            [frontend.state :as state]
+            [logseq.graph-parser.util :as gp-util]))
 
 (defn idstr-template-string
   "Accepts repo url and returns a string for the vector store comment"
@@ -40,18 +41,15 @@
           store-conn   (if encoder-dim
                          (vector-store/create (idstr-template-string repo) encoder-dim)
                          (throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
-          block->promise (fn [block]
-                           ;; TODO Junyi: Chunker
-                           (p/let [embed (text-encoder/text-encode (:content block) encoder-name)
-                                   _     (vector-store/add store-conn embed (:uuid block))]))
-          embed-promises (map block->promise blocks-to-add)])
-                    (p/let [uuids (map  blocks-to-add)
-                            store-handler (vector-store/try-create )]
-                      (vector-store/add store-handler embed uuid))
-                    (prn "sematic: transact-blocks!") ;; TODO Junyi
-                    (prn data)
-                    (prn blocks-to-remove-set)
-                    (prn blocks-to-add))
+          addtask-fn (fn [block] (.addTask taskQueue (:uuid block)
+                                       (fn [] ;; Promise factory
+                                         ;; TODO Junyi: Block Chunker
+                                         (p/let [data  {:snippet (gp-util/safe-subs (:content block) 0 20)
+                                                        :page    (:page block)
+                                                        :id      (:id block)}
+                                                 embed (text-encoder/text-encode (:content block) encoder-name)]
+                                           (vector-store/add store-conn embed (:uuid block) data)))))]
+      (mapv addtask-fn blocks-to-add)))
   
   (transact-pages! [_this data]
                    

+ 4 - 4
yarn.lock

@@ -519,10 +519,10 @@
   resolved "https://registry.yarnpkg.com/@logseq/diff-merge/-/diff-merge-0.2.2.tgz#583bd8c8c66d5ff05ea70906475efaa078e839a3"
   integrity sha512-0WeKNhq8PsjvunOqNEd9aSM4tgiClwhonXgXzrQ4KYj8VoyLaEAyEWWGOAoE7mwR+aqwM+bMB4MxuNFywnUb8A==
 
-"@logseq/logmind@^0.1.0":
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/@logseq/logmind/-/logmind-0.1.0.tgz#dda13b967ad0cf7c64b4ad63a24b42790501ff16"
-  integrity sha512-dyoHNDn9/H9r8dlUVnURuBCzPVVnJtUkY/GJiZWYtWkVjbK5LN5ZGN7PCp2BKAKE7Gz+/sHWwIb9yVPNDJP9XQ==
+"@logseq/logmind@^0.1.2":
+  version "0.1.2"
+  resolved "https://registry.yarnpkg.com/@logseq/logmind/-/logmind-0.1.2.tgz#026eed5cc225f5df1b7d2cc63f665d46f7209c3a"
+  integrity sha512-JIoWslOW2T94YRVCk8HwwBGRZUD1kQks1v+00MHLwRBni/9nw/BjeSuEmOPhYb2WLBELRmwprqtddyQM2Kvqkw==
   dependencies:
     "@xenova/transformers" "^2.3.0"
     compromise "^14.8.0"