str.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. package service
  2. import (
  3. "bytes"
  4. "fmt"
  5. "hash/fnv"
  6. "sort"
  7. "strings"
  8. "sync"
  9. goahocorasick "github.com/anknown/ahocorasick"
  10. )
  11. func SundaySearch(text string, pattern string) bool {
  12. // 计算偏移表
  13. offset := make(map[rune]int)
  14. for i, c := range pattern {
  15. offset[c] = len(pattern) - i
  16. }
  17. // 文本串长度和模式串长度
  18. n, m := len(text), len(pattern)
  19. // 主循环,i表示当前对齐的文本串位置
  20. for i := 0; i <= n-m; {
  21. // 检查子串
  22. j := 0
  23. for j < m && text[i+j] == pattern[j] {
  24. j++
  25. }
  26. // 如果完全匹配,返回匹配位置
  27. if j == m {
  28. return true
  29. }
  30. // 如果还有剩余字符,则检查下一位字符在偏移表中的值
  31. if i+m < n {
  32. next := rune(text[i+m])
  33. if val, ok := offset[next]; ok {
  34. i += val // 存在于偏移表中,进行跳跃
  35. } else {
  36. i += len(pattern) + 1 // 不存在于偏移表中,跳过整个模式串长度
  37. }
  38. } else {
  39. break
  40. }
  41. }
  42. return false // 如果没有找到匹配,返回-1
  43. }
  44. func RemoveDuplicate(s []string) []string {
  45. result := make([]string, 0, len(s))
  46. temp := map[string]struct{}{}
  47. for _, item := range s {
  48. if _, ok := temp[item]; !ok {
  49. temp[item] = struct{}{}
  50. result = append(result, item)
  51. }
  52. }
  53. return result
  54. }
  55. func InitAc(dict []string) *goahocorasick.Machine {
  56. m := new(goahocorasick.Machine)
  57. runes := readRunes(dict)
  58. if err := m.Build(runes); err != nil {
  59. fmt.Println(err)
  60. return nil
  61. }
  62. return m
  63. }
  64. var acCache sync.Map
  65. func acKey(dict []string) string {
  66. if len(dict) == 0 {
  67. return ""
  68. }
  69. normalized := make([]string, 0, len(dict))
  70. for _, w := range dict {
  71. w = strings.ToLower(strings.TrimSpace(w))
  72. if w != "" {
  73. normalized = append(normalized, w)
  74. }
  75. }
  76. if len(normalized) == 0 {
  77. return ""
  78. }
  79. sort.Strings(normalized)
  80. hasher := fnv.New64a()
  81. for _, w := range normalized {
  82. hasher.Write([]byte{0})
  83. hasher.Write([]byte(w))
  84. }
  85. return fmt.Sprintf("%x", hasher.Sum64())
  86. }
  87. func getOrBuildAC(dict []string) *goahocorasick.Machine {
  88. key := acKey(dict)
  89. if key == "" {
  90. return nil
  91. }
  92. if v, ok := acCache.Load(key); ok {
  93. if m, ok2 := v.(*goahocorasick.Machine); ok2 {
  94. return m
  95. }
  96. }
  97. m := InitAc(dict)
  98. if m == nil {
  99. return nil
  100. }
  101. if actual, loaded := acCache.LoadOrStore(key, m); loaded {
  102. if cached, ok := actual.(*goahocorasick.Machine); ok {
  103. return cached
  104. }
  105. }
  106. return m
  107. }
  108. func readRunes(dict []string) [][]rune {
  109. var runes [][]rune
  110. for _, word := range dict {
  111. word = strings.ToLower(word)
  112. l := bytes.TrimSpace([]byte(word))
  113. runes = append(runes, bytes.Runes(l))
  114. }
  115. return runes
  116. }
  117. func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []string) {
  118. if len(dict) == 0 {
  119. return false, nil
  120. }
  121. if len(findText) == 0 {
  122. return false, nil
  123. }
  124. m := getOrBuildAC(dict)
  125. if m == nil {
  126. return false, nil
  127. }
  128. hits := m.MultiPatternSearch([]rune(findText), stopImmediately)
  129. if len(hits) > 0 {
  130. words := make([]string, 0)
  131. for _, hit := range hits {
  132. words = append(words, string(hit.Word))
  133. }
  134. return true, words
  135. }
  136. return false, nil
  137. }