splitter.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. package splitter
  2. import "slices"
  3. type Splitter struct {
  4. heads [][]byte
  5. tails [][]byte
  6. buffer []byte
  7. state int
  8. partialTailPos []int
  9. kmpNexts [][]int
  10. impossibleHeads []bool // Track which heads cannot match (inverted logic)
  11. longestHeadLen int // Cache the longest head length
  12. }
  13. func NewSplitter(heads, tails [][]byte) *Splitter {
  14. kmpNexts := make([][]int, len(tails))
  15. for i, tail := range tails {
  16. kmpNexts[i] = computeKMPNext(tail)
  17. }
  18. // Find the longest head pattern length
  19. longestHeadLen := 0
  20. for _, head := range heads {
  21. if len(head) > longestHeadLen {
  22. longestHeadLen = len(head)
  23. }
  24. }
  25. return &Splitter{
  26. heads: heads,
  27. tails: tails,
  28. kmpNexts: kmpNexts,
  29. partialTailPos: make([]int, len(tails)),
  30. impossibleHeads: make(
  31. []bool,
  32. len(heads),
  33. ), // Defaults to false (all heads initially possible)
  34. longestHeadLen: longestHeadLen,
  35. }
  36. }
  37. func computeKMPNext(pattern []byte) []int {
  38. n := len(pattern)
  39. next := make([]int, n)
  40. if n == 0 {
  41. return next
  42. }
  43. next[0] = 0
  44. for i := 1; i < n; i++ {
  45. j := next[i-1]
  46. for j > 0 && pattern[i] != pattern[j] {
  47. j = next[j-1]
  48. }
  49. if pattern[i] == pattern[j] {
  50. j++
  51. }
  52. next[i] = j
  53. }
  54. return next
  55. }
  56. func (s *Splitter) Process(data []byte) ([]byte, []byte) {
  57. if len(data) == 0 {
  58. return nil, nil
  59. }
  60. switch s.state {
  61. case 0:
  62. s.buffer = append(s.buffer, data...)
  63. bufLen := len(s.buffer)
  64. headMatched := false
  65. headMatchLen := 0
  66. anyPossibleHead := false
  67. // Check all heads in a single pass
  68. for i, head := range s.heads {
  69. // Skip if this head has already been ruled out
  70. if s.impossibleHeads[i] {
  71. continue
  72. }
  73. headLen := len(head)
  74. // Check for complete match
  75. if bufLen >= headLen {
  76. if slices.Equal(s.buffer[:headLen], head) {
  77. headMatched = true
  78. headMatchLen = headLen
  79. break
  80. }
  81. // Mark this head as impossible to match
  82. s.impossibleHeads[i] = true
  83. } else {
  84. // Check for partial match (potential match)
  85. matchLen := bufLen
  86. if slices.Equal(s.buffer[:matchLen], head[:matchLen]) {
  87. anyPossibleHead = true
  88. } else {
  89. // Mark this head as impossible to match
  90. s.impossibleHeads[i] = true
  91. }
  92. }
  93. }
  94. if headMatched {
  95. // Head found, move to seeking tail
  96. s.state = 1
  97. s.buffer = s.buffer[headMatchLen:]
  98. if len(s.buffer) == 0 {
  99. return nil, nil
  100. }
  101. return s.processSeekTail()
  102. }
  103. if anyPossibleHead {
  104. // Need more data to determine if a head matches
  105. return nil, nil
  106. }
  107. // No head matches and no partial match possible, move to done state
  108. s.state = 2
  109. remaining := s.buffer
  110. s.buffer = nil
  111. return nil, remaining
  112. case 1:
  113. s.buffer = append(s.buffer, data...)
  114. return s.processSeekTail()
  115. default:
  116. return nil, data
  117. }
  118. }
  119. func (s *Splitter) processSeekTail() ([]byte, []byte) {
  120. data := s.buffer
  121. // Check for each tail pattern
  122. for tailIdx, tail := range s.tails {
  123. j := s.partialTailPos[tailIdx]
  124. tailLen := len(tail)
  125. kmpNext := s.kmpNexts[tailIdx]
  126. for i := range data {
  127. for j > 0 && data[i] != tail[j] {
  128. j = kmpNext[j-1]
  129. }
  130. if data[i] == tail[j] {
  131. j++
  132. if j == tailLen {
  133. end := max(i-tailLen+1, 0)
  134. result := data[:end]
  135. remaining := data[i+1:]
  136. s.buffer = nil
  137. s.state = 2
  138. return result, remaining
  139. }
  140. }
  141. }
  142. // Update partial match position for this tail
  143. s.partialTailPos[tailIdx] = j
  144. }
  145. // Determine how much of the buffer we can safely return
  146. minSafePos := len(data)
  147. for _, pos := range s.partialTailPos {
  148. if pos > 0 {
  149. // We have a partial match for this tail
  150. tailMatchLen := pos
  151. safePos := len(data) - tailMatchLen
  152. if safePos < minSafePos {
  153. minSafePos = safePos
  154. }
  155. }
  156. }
  157. if minSafePos <= 0 {
  158. // We can't safely return anything yet
  159. return nil, nil
  160. }
  161. result := data[:minSafePos]
  162. s.buffer = data[minSafePos:]
  163. return result, nil
  164. }