123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- package strmatcher
- import (
- "container/list"
- )
- const validCharCount = 53
- type MatchType struct {
- matchType Type
- exist bool
- }
- const (
- TrieEdge bool = true
- FailEdge bool = false
- )
- type Edge struct {
- edgeType bool
- nextNode int
- }
- type ACAutomaton struct {
- trie [][validCharCount]Edge
- fail []int
- exists []MatchType
- count int
- }
- func newNode() [validCharCount]Edge {
- var s [validCharCount]Edge
- for i := range s {
- s[i] = Edge{
- edgeType: FailEdge,
- nextNode: 0,
- }
- }
- return s
- }
- var char2Index = []int{
- 'A': 0,
- 'a': 0,
- 'B': 1,
- 'b': 1,
- 'C': 2,
- 'c': 2,
- 'D': 3,
- 'd': 3,
- 'E': 4,
- 'e': 4,
- 'F': 5,
- 'f': 5,
- 'G': 6,
- 'g': 6,
- 'H': 7,
- 'h': 7,
- 'I': 8,
- 'i': 8,
- 'J': 9,
- 'j': 9,
- 'K': 10,
- 'k': 10,
- 'L': 11,
- 'l': 11,
- 'M': 12,
- 'm': 12,
- 'N': 13,
- 'n': 13,
- 'O': 14,
- 'o': 14,
- 'P': 15,
- 'p': 15,
- 'Q': 16,
- 'q': 16,
- 'R': 17,
- 'r': 17,
- 'S': 18,
- 's': 18,
- 'T': 19,
- 't': 19,
- 'U': 20,
- 'u': 20,
- 'V': 21,
- 'v': 21,
- 'W': 22,
- 'w': 22,
- 'X': 23,
- 'x': 23,
- 'Y': 24,
- 'y': 24,
- 'Z': 25,
- 'z': 25,
- '!': 26,
- '$': 27,
- '&': 28,
- '\'': 29,
- '(': 30,
- ')': 31,
- '*': 32,
- '+': 33,
- ',': 34,
- ';': 35,
- '=': 36,
- ':': 37,
- '%': 38,
- '-': 39,
- '.': 40,
- '_': 41,
- '~': 42,
- '0': 43,
- '1': 44,
- '2': 45,
- '3': 46,
- '4': 47,
- '5': 48,
- '6': 49,
- '7': 50,
- '8': 51,
- '9': 52,
- }
- func NewACAutomaton() *ACAutomaton {
- ac := new(ACAutomaton)
- ac.trie = append(ac.trie, newNode())
- ac.fail = append(ac.fail, 0)
- ac.exists = append(ac.exists, MatchType{
- matchType: Full,
- exist: false,
- })
- return ac
- }
- func (ac *ACAutomaton) Add(domain string, t Type) {
- node := 0
- for i := len(domain) - 1; i >= 0; i-- {
- idx := char2Index[domain[i]]
- if ac.trie[node][idx].nextNode == 0 {
- ac.count++
- if len(ac.trie) < ac.count+1 {
- ac.trie = append(ac.trie, newNode())
- ac.fail = append(ac.fail, 0)
- ac.exists = append(ac.exists, MatchType{
- matchType: Full,
- exist: false,
- })
- }
- ac.trie[node][idx] = Edge{
- edgeType: TrieEdge,
- nextNode: ac.count,
- }
- }
- node = ac.trie[node][idx].nextNode
- }
- ac.exists[node] = MatchType{
- matchType: t,
- exist: true,
- }
- switch t {
- case Domain:
- ac.exists[node] = MatchType{
- matchType: Full,
- exist: true,
- }
- idx := char2Index['.']
- if ac.trie[node][idx].nextNode == 0 {
- ac.count++
- if len(ac.trie) < ac.count+1 {
- ac.trie = append(ac.trie, newNode())
- ac.fail = append(ac.fail, 0)
- ac.exists = append(ac.exists, MatchType{
- matchType: Full,
- exist: false,
- })
- }
- ac.trie[node][idx] = Edge{
- edgeType: TrieEdge,
- nextNode: ac.count,
- }
- }
- node = ac.trie[node][idx].nextNode
- ac.exists[node] = MatchType{
- matchType: t,
- exist: true,
- }
- default:
- break
- }
- }
- func (ac *ACAutomaton) Build() {
- queue := list.New()
- for i := 0; i < validCharCount; i++ {
- if ac.trie[0][i].nextNode != 0 {
- queue.PushBack(ac.trie[0][i])
- }
- }
- for {
- front := queue.Front()
- if front == nil {
- break
- } else {
- node := front.Value.(Edge).nextNode
- queue.Remove(front)
- for i := 0; i < validCharCount; i++ {
- if ac.trie[node][i].nextNode != 0 {
- ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode
- queue.PushBack(ac.trie[node][i])
- } else {
- ac.trie[node][i] = Edge{
- edgeType: FailEdge,
- nextNode: ac.trie[ac.fail[node]][i].nextNode,
- }
- }
- }
- }
- }
- }
- func (ac *ACAutomaton) Match(s string) bool {
- node := 0
- fullMatch := true
- // 1. the match string is all through trie edge. FULL MATCH or DOMAIN
- // 2. the match string is through a fail edge. NOT FULL MATCH
- // 2.1 Through a fail edge, but there exists a valid node. SUBSTR
- for i := len(s) - 1; i >= 0; i-- {
- idx := char2Index[s[i]]
- fullMatch = fullMatch && ac.trie[node][idx].edgeType
- node = ac.trie[node][idx].nextNode
- switch ac.exists[node].matchType {
- case Substr:
- return true
- case Domain:
- if fullMatch {
- return true
- }
- default:
- break
- }
- }
- return fullMatch && ac.exists[node].exist
- }
|