search_fuzzy.cljs 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. (ns frontend.common.search-fuzzy
  2. "fuzzy search. Used by frontend and worker namespaces"
  3. (:require [clojure.string :as string]
  4. [cljs-bean.core :as bean]
  5. ["remove-accents" :as removeAccents]))
  6. (def MAX-STRING-LENGTH 1000.0)
  7. (defn clean-str
  8. [s]
  9. (string/replace (string/lower-case s) #"[\[ \\/_\]\(\)]+" ""))
  10. (defn char-array
  11. [s]
  12. (bean/->js (seq s)))
  13. ;; Copied from https://gist.github.com/vaughnd/5099299
  14. (defn str-len-distance
  15. ;; normalized multiplier 0-1
  16. ;; measures length distance between strings.
  17. ;; 1 = same length
  18. [s1 s2]
  19. (let [c1 (count s1)
  20. c2 (count s2)
  21. maxed (max c1 c2)
  22. mined (min c1 c2)]
  23. (double (- 1
  24. (/ (- maxed mined)
  25. maxed)))))
  26. (defn score
  27. [oquery ostr]
  28. (let [query (clean-str oquery)
  29. str (clean-str ostr)]
  30. (loop [q (seq (char-array query))
  31. s (seq (char-array str))
  32. mult 1
  33. idx MAX-STRING-LENGTH
  34. score 0]
  35. (cond
  36. ;; add str-len-distance to score, so strings with matches in same position get sorted by length
  37. ;; boost score if we have an exact match including punctuation
  38. (empty? q) (+ score
  39. (str-len-distance query str)
  40. (if (<= 0 (.indexOf ostr oquery)) MAX-STRING-LENGTH 0))
  41. (empty? s) 0
  42. :else (if (= (first q) (first s))
  43. (recur (rest q)
  44. (rest s)
  45. (inc mult) ;; increase the multiplier as more query chars are matched
  46. (dec idx) ;; decrease idx so score gets lowered the further into the string we match
  47. (+ mult score)) ;; score for this match is current multiplier * idx
  48. (recur q
  49. (rest s)
  50. 1 ;; when there is no match, reset multiplier to one
  51. (dec idx)
  52. (- score 0.1)))))))
  53. (defn search-normalize
  54. "Normalize string for searching (loose)"
  55. [s remove-accents?]
  56. (when s
  57. (let [normalize-str (.normalize (string/lower-case s) "NFKC")]
  58. (if remove-accents?
  59. (removeAccents normalize-str)
  60. normalize-str))))
  61. (defn fuzzy-search
  62. [data query & {:keys [limit extract-fn]
  63. :or {limit 20}}]
  64. (let [query (search-normalize query true)]
  65. (->> (take limit
  66. (sort-by :score (comp - compare)
  67. (filter #(< 0 (:score %))
  68. (for [item data]
  69. (let [s (str (if extract-fn (extract-fn item) item))]
  70. {:data item
  71. :score (score query (search-normalize s true))})))))
  72. (map :data))))