| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- package service
- import (
- "bytes"
- "fmt"
- "hash/fnv"
- "sort"
- "strings"
- "sync"
- goahocorasick "github.com/anknown/ahocorasick"
- )
- func SundaySearch(text string, pattern string) bool {
- // 计算偏移表
- offset := make(map[rune]int)
- for i, c := range pattern {
- offset[c] = len(pattern) - i
- }
- // 文本串长度和模式串长度
- n, m := len(text), len(pattern)
- // 主循环,i表示当前对齐的文本串位置
- for i := 0; i <= n-m; {
- // 检查子串
- j := 0
- for j < m && text[i+j] == pattern[j] {
- j++
- }
- // 如果完全匹配,返回匹配位置
- if j == m {
- return true
- }
- // 如果还有剩余字符,则检查下一位字符在偏移表中的值
- if i+m < n {
- next := rune(text[i+m])
- if val, ok := offset[next]; ok {
- i += val // 存在于偏移表中,进行跳跃
- } else {
- i += len(pattern) + 1 // 不存在于偏移表中,跳过整个模式串长度
- }
- } else {
- break
- }
- }
- return false // 如果没有找到匹配,返回-1
- }
- func RemoveDuplicate(s []string) []string {
- result := make([]string, 0, len(s))
- temp := map[string]struct{}{}
- for _, item := range s {
- if _, ok := temp[item]; !ok {
- temp[item] = struct{}{}
- result = append(result, item)
- }
- }
- return result
- }
- func InitAc(dict []string) *goahocorasick.Machine {
- m := new(goahocorasick.Machine)
- runes := readRunes(dict)
- if err := m.Build(runes); err != nil {
- fmt.Println(err)
- return nil
- }
- return m
- }
- var acCache sync.Map
- func acKey(dict []string) string {
- if len(dict) == 0 {
- return ""
- }
- normalized := make([]string, 0, len(dict))
- for _, w := range dict {
- w = strings.ToLower(strings.TrimSpace(w))
- if w != "" {
- normalized = append(normalized, w)
- }
- }
- if len(normalized) == 0 {
- return ""
- }
- sort.Strings(normalized)
- hasher := fnv.New64a()
- for _, w := range normalized {
- hasher.Write([]byte{0})
- hasher.Write([]byte(w))
- }
- return fmt.Sprintf("%x", hasher.Sum64())
- }
- func getOrBuildAC(dict []string) *goahocorasick.Machine {
- key := acKey(dict)
- if key == "" {
- return nil
- }
- if v, ok := acCache.Load(key); ok {
- if m, ok2 := v.(*goahocorasick.Machine); ok2 {
- return m
- }
- }
- m := InitAc(dict)
- if m == nil {
- return nil
- }
- if actual, loaded := acCache.LoadOrStore(key, m); loaded {
- if cached, ok := actual.(*goahocorasick.Machine); ok {
- return cached
- }
- }
- return m
- }
- func readRunes(dict []string) [][]rune {
- var runes [][]rune
- for _, word := range dict {
- word = strings.ToLower(word)
- l := bytes.TrimSpace([]byte(word))
- runes = append(runes, bytes.Runes(l))
- }
- return runes
- }
- func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []string) {
- if len(dict) == 0 {
- return false, nil
- }
- if len(findText) == 0 {
- return false, nil
- }
- m := getOrBuildAC(dict)
- if m == nil {
- return false, nil
- }
- hits := m.MultiPatternSearch([]rune(findText), stopImmediately)
- if len(hits) > 0 {
- words := make([]string, 0)
- for _, hit := range hits {
- words = append(words, string(hit.Word))
- }
- return true, words
- }
- return false, nil
- }
|