| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- package splitter
- import "slices"
- type Splitter struct {
- heads [][]byte
- tails [][]byte
- buffer []byte
- state int
- partialTailPos []int
- kmpNexts [][]int
- impossibleHeads []bool // Track which heads cannot match (inverted logic)
- longestHeadLen int // Cache the longest head length
- }
- func NewSplitter(heads, tails [][]byte) *Splitter {
- kmpNexts := make([][]int, len(tails))
- for i, tail := range tails {
- kmpNexts[i] = computeKMPNext(tail)
- }
- // Find the longest head pattern length
- longestHeadLen := 0
- for _, head := range heads {
- if len(head) > longestHeadLen {
- longestHeadLen = len(head)
- }
- }
- return &Splitter{
- heads: heads,
- tails: tails,
- kmpNexts: kmpNexts,
- partialTailPos: make([]int, len(tails)),
- impossibleHeads: make(
- []bool,
- len(heads),
- ), // Defaults to false (all heads initially possible)
- longestHeadLen: longestHeadLen,
- }
- }
- func computeKMPNext(pattern []byte) []int {
- n := len(pattern)
- next := make([]int, n)
- if n == 0 {
- return next
- }
- next[0] = 0
- for i := 1; i < n; i++ {
- j := next[i-1]
- for j > 0 && pattern[i] != pattern[j] {
- j = next[j-1]
- }
- if pattern[i] == pattern[j] {
- j++
- }
- next[i] = j
- }
- return next
- }
- func (s *Splitter) Process(data []byte) ([]byte, []byte) {
- if len(data) == 0 {
- return nil, nil
- }
- switch s.state {
- case 0:
- s.buffer = append(s.buffer, data...)
- bufLen := len(s.buffer)
- headMatched := false
- headMatchLen := 0
- anyPossibleHead := false
- // Check all heads in a single pass
- for i, head := range s.heads {
- // Skip if this head has already been ruled out
- if s.impossibleHeads[i] {
- continue
- }
- headLen := len(head)
- // Check for complete match
- if bufLen >= headLen {
- if slices.Equal(s.buffer[:headLen], head) {
- headMatched = true
- headMatchLen = headLen
- break
- }
- // Mark this head as impossible to match
- s.impossibleHeads[i] = true
- } else {
- // Check for partial match (potential match)
- matchLen := bufLen
- if slices.Equal(s.buffer[:matchLen], head[:matchLen]) {
- anyPossibleHead = true
- } else {
- // Mark this head as impossible to match
- s.impossibleHeads[i] = true
- }
- }
- }
- if headMatched {
- // Head found, move to seeking tail
- s.state = 1
- s.buffer = s.buffer[headMatchLen:]
- if len(s.buffer) == 0 {
- return nil, nil
- }
- return s.processSeekTail()
- }
- if anyPossibleHead {
- // Need more data to determine if a head matches
- return nil, nil
- }
- // No head matches and no partial match possible, move to done state
- s.state = 2
- remaining := s.buffer
- s.buffer = nil
- return nil, remaining
- case 1:
- s.buffer = append(s.buffer, data...)
- return s.processSeekTail()
- default:
- return nil, data
- }
- }
- func (s *Splitter) processSeekTail() ([]byte, []byte) {
- data := s.buffer
- // Check for each tail pattern
- for tailIdx, tail := range s.tails {
- j := s.partialTailPos[tailIdx]
- tailLen := len(tail)
- kmpNext := s.kmpNexts[tailIdx]
- for i := range data {
- for j > 0 && data[i] != tail[j] {
- j = kmpNext[j-1]
- }
- if data[i] == tail[j] {
- j++
- if j == tailLen {
- end := max(i-tailLen+1, 0)
- result := data[:end]
- remaining := data[i+1:]
- s.buffer = nil
- s.state = 2
- return result, remaining
- }
- }
- }
- // Update partial match position for this tail
- s.partialTailPos[tailIdx] = j
- }
- // Determine how much of the buffer we can safely return
- minSafePos := len(data)
- for _, pos := range s.partialTailPos {
- if pos > 0 {
- // We have a partial match for this tail
- tailMatchLen := pos
- safePos := len(data) - tailMatchLen
- if safePos < minSafePos {
- minSafePos = safePos
- }
- }
- }
- if minSafePos <= 0 {
- // We can't safely return anything yet
- return nil, nil
- }
- result := data[:minSafePos]
- s.buffer = data[minSafePos:]
- return result, nil
- }
|