| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 | 
							- package strmatcher
 
- import (
 
- 	"container/list"
 
- )
 
- const validCharCount = 53
 
- type MatchType struct {
 
- 	matchType Type
 
- 	exist     bool
 
- }
 
- const (
 
- 	TrieEdge bool = true
 
- 	FailEdge bool = false
 
- )
 
- type Edge struct {
 
- 	edgeType bool
 
- 	nextNode int
 
- }
 
- type ACAutomaton struct {
 
- 	trie   [][validCharCount]Edge
 
- 	fail   []int
 
- 	exists []MatchType
 
- 	count  int
 
- }
 
- func newNode() [validCharCount]Edge {
 
- 	var s [validCharCount]Edge
 
- 	for i := range s {
 
- 		s[i] = Edge{
 
- 			edgeType: FailEdge,
 
- 			nextNode: 0,
 
- 		}
 
- 	}
 
- 	return s
 
- }
 
- var char2Index = []int{
 
- 	'A':  0,
 
- 	'a':  0,
 
- 	'B':  1,
 
- 	'b':  1,
 
- 	'C':  2,
 
- 	'c':  2,
 
- 	'D':  3,
 
- 	'd':  3,
 
- 	'E':  4,
 
- 	'e':  4,
 
- 	'F':  5,
 
- 	'f':  5,
 
- 	'G':  6,
 
- 	'g':  6,
 
- 	'H':  7,
 
- 	'h':  7,
 
- 	'I':  8,
 
- 	'i':  8,
 
- 	'J':  9,
 
- 	'j':  9,
 
- 	'K':  10,
 
- 	'k':  10,
 
- 	'L':  11,
 
- 	'l':  11,
 
- 	'M':  12,
 
- 	'm':  12,
 
- 	'N':  13,
 
- 	'n':  13,
 
- 	'O':  14,
 
- 	'o':  14,
 
- 	'P':  15,
 
- 	'p':  15,
 
- 	'Q':  16,
 
- 	'q':  16,
 
- 	'R':  17,
 
- 	'r':  17,
 
- 	'S':  18,
 
- 	's':  18,
 
- 	'T':  19,
 
- 	't':  19,
 
- 	'U':  20,
 
- 	'u':  20,
 
- 	'V':  21,
 
- 	'v':  21,
 
- 	'W':  22,
 
- 	'w':  22,
 
- 	'X':  23,
 
- 	'x':  23,
 
- 	'Y':  24,
 
- 	'y':  24,
 
- 	'Z':  25,
 
- 	'z':  25,
 
- 	'!':  26,
 
- 	'$':  27,
 
- 	'&':  28,
 
- 	'\'': 29,
 
- 	'(':  30,
 
- 	')':  31,
 
- 	'*':  32,
 
- 	'+':  33,
 
- 	',':  34,
 
- 	';':  35,
 
- 	'=':  36,
 
- 	':':  37,
 
- 	'%':  38,
 
- 	'-':  39,
 
- 	'.':  40,
 
- 	'_':  41,
 
- 	'~':  42,
 
- 	'0':  43,
 
- 	'1':  44,
 
- 	'2':  45,
 
- 	'3':  46,
 
- 	'4':  47,
 
- 	'5':  48,
 
- 	'6':  49,
 
- 	'7':  50,
 
- 	'8':  51,
 
- 	'9':  52,
 
- }
 
- func NewACAutomaton() *ACAutomaton {
 
- 	ac := new(ACAutomaton)
 
- 	ac.trie = append(ac.trie, newNode())
 
- 	ac.fail = append(ac.fail, 0)
 
- 	ac.exists = append(ac.exists, MatchType{
 
- 		matchType: Full,
 
- 		exist:     false,
 
- 	})
 
- 	return ac
 
- }
 
- func (ac *ACAutomaton) Add(domain string, t Type) {
 
- 	node := 0
 
- 	for i := len(domain) - 1; i >= 0; i-- {
 
- 		idx := char2Index[domain[i]]
 
- 		if ac.trie[node][idx].nextNode == 0 {
 
- 			ac.count++
 
- 			if len(ac.trie) < ac.count+1 {
 
- 				ac.trie = append(ac.trie, newNode())
 
- 				ac.fail = append(ac.fail, 0)
 
- 				ac.exists = append(ac.exists, MatchType{
 
- 					matchType: Full,
 
- 					exist:     false,
 
- 				})
 
- 			}
 
- 			ac.trie[node][idx] = Edge{
 
- 				edgeType: TrieEdge,
 
- 				nextNode: ac.count,
 
- 			}
 
- 		}
 
- 		node = ac.trie[node][idx].nextNode
 
- 	}
 
- 	ac.exists[node] = MatchType{
 
- 		matchType: t,
 
- 		exist:     true,
 
- 	}
 
- 	switch t {
 
- 	case Domain:
 
- 		ac.exists[node] = MatchType{
 
- 			matchType: Full,
 
- 			exist:     true,
 
- 		}
 
- 		idx := char2Index['.']
 
- 		if ac.trie[node][idx].nextNode == 0 {
 
- 			ac.count++
 
- 			if len(ac.trie) < ac.count+1 {
 
- 				ac.trie = append(ac.trie, newNode())
 
- 				ac.fail = append(ac.fail, 0)
 
- 				ac.exists = append(ac.exists, MatchType{
 
- 					matchType: Full,
 
- 					exist:     false,
 
- 				})
 
- 			}
 
- 			ac.trie[node][idx] = Edge{
 
- 				edgeType: TrieEdge,
 
- 				nextNode: ac.count,
 
- 			}
 
- 		}
 
- 		node = ac.trie[node][idx].nextNode
 
- 		ac.exists[node] = MatchType{
 
- 			matchType: t,
 
- 			exist:     true,
 
- 		}
 
- 	default:
 
- 		break
 
- 	}
 
- }
 
- func (ac *ACAutomaton) Build() {
 
- 	queue := list.New()
 
- 	for i := 0; i < validCharCount; i++ {
 
- 		if ac.trie[0][i].nextNode != 0 {
 
- 			queue.PushBack(ac.trie[0][i])
 
- 		}
 
- 	}
 
- 	for {
 
- 		front := queue.Front()
 
- 		if front == nil {
 
- 			break
 
- 		} else {
 
- 			node := front.Value.(Edge).nextNode
 
- 			queue.Remove(front)
 
- 			for i := 0; i < validCharCount; i++ {
 
- 				if ac.trie[node][i].nextNode != 0 {
 
- 					ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode
 
- 					queue.PushBack(ac.trie[node][i])
 
- 				} else {
 
- 					ac.trie[node][i] = Edge{
 
- 						edgeType: FailEdge,
 
- 						nextNode: ac.trie[ac.fail[node]][i].nextNode,
 
- 					}
 
- 				}
 
- 			}
 
- 		}
 
- 	}
 
- }
 
- func (ac *ACAutomaton) Match(s string) bool {
 
- 	node := 0
 
- 	fullMatch := true
 
- 	// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
 
- 	// 2. the match string is through a fail edge. NOT FULL MATCH
 
- 	// 2.1 Through a fail edge, but there exists a valid node. SUBSTR
 
- 	for i := len(s) - 1; i >= 0; i-- {
 
- 		idx := char2Index[s[i]]
 
- 		fullMatch = fullMatch && ac.trie[node][idx].edgeType
 
- 		node = ac.trie[node][idx].nextNode
 
- 		switch ac.exists[node].matchType {
 
- 		case Substr:
 
- 			return true
 
- 		case Domain:
 
- 			if fullMatch {
 
- 				return true
 
- 			}
 
- 		default:
 
- 			break
 
- 		}
 
- 	}
 
- 	return fullMatch && ac.exists[node].exist
 
- }
 
 
  |