2021-04-18 08:21:17 +03:00
|
|
|
package strmatcher
|
|
|
|
|
|
|
|
import (
|
|
|
|
"container/list"
|
|
|
|
)
|
|
|
|
|
|
|
|
const validCharCount = 53
|
|
|
|
|
|
|
|
type MatchType struct {
|
|
|
|
matchType Type
|
|
|
|
exist bool
|
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
|
|
|
TrieEdge bool = true
|
|
|
|
FailEdge bool = false
|
|
|
|
)
|
|
|
|
|
|
|
|
type Edge struct {
|
|
|
|
edgeType bool
|
|
|
|
nextNode int
|
|
|
|
}
|
|
|
|
|
|
|
|
type ACAutomaton struct {
|
|
|
|
trie [][validCharCount]Edge
|
|
|
|
fail []int
|
|
|
|
exists []MatchType
|
|
|
|
count int
|
|
|
|
}
|
|
|
|
|
|
|
|
func newNode() [validCharCount]Edge {
|
|
|
|
var s [validCharCount]Edge
|
|
|
|
for i := range s {
|
|
|
|
s[i] = Edge{
|
|
|
|
edgeType: FailEdge,
|
|
|
|
nextNode: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
var char2Index = []int{
|
|
|
|
'A': 0,
|
|
|
|
'a': 0,
|
|
|
|
'B': 1,
|
|
|
|
'b': 1,
|
|
|
|
'C': 2,
|
|
|
|
'c': 2,
|
|
|
|
'D': 3,
|
|
|
|
'd': 3,
|
|
|
|
'E': 4,
|
|
|
|
'e': 4,
|
|
|
|
'F': 5,
|
|
|
|
'f': 5,
|
|
|
|
'G': 6,
|
|
|
|
'g': 6,
|
|
|
|
'H': 7,
|
|
|
|
'h': 7,
|
|
|
|
'I': 8,
|
|
|
|
'i': 8,
|
|
|
|
'J': 9,
|
|
|
|
'j': 9,
|
|
|
|
'K': 10,
|
|
|
|
'k': 10,
|
|
|
|
'L': 11,
|
|
|
|
'l': 11,
|
|
|
|
'M': 12,
|
|
|
|
'm': 12,
|
|
|
|
'N': 13,
|
|
|
|
'n': 13,
|
|
|
|
'O': 14,
|
|
|
|
'o': 14,
|
|
|
|
'P': 15,
|
|
|
|
'p': 15,
|
|
|
|
'Q': 16,
|
|
|
|
'q': 16,
|
|
|
|
'R': 17,
|
|
|
|
'r': 17,
|
|
|
|
'S': 18,
|
|
|
|
's': 18,
|
|
|
|
'T': 19,
|
|
|
|
't': 19,
|
|
|
|
'U': 20,
|
|
|
|
'u': 20,
|
|
|
|
'V': 21,
|
|
|
|
'v': 21,
|
|
|
|
'W': 22,
|
|
|
|
'w': 22,
|
|
|
|
'X': 23,
|
|
|
|
'x': 23,
|
|
|
|
'Y': 24,
|
|
|
|
'y': 24,
|
|
|
|
'Z': 25,
|
|
|
|
'z': 25,
|
|
|
|
'!': 26,
|
|
|
|
'$': 27,
|
|
|
|
'&': 28,
|
|
|
|
'\'': 29,
|
|
|
|
'(': 30,
|
|
|
|
')': 31,
|
|
|
|
'*': 32,
|
|
|
|
'+': 33,
|
|
|
|
',': 34,
|
|
|
|
';': 35,
|
|
|
|
'=': 36,
|
|
|
|
':': 37,
|
|
|
|
'%': 38,
|
|
|
|
'-': 39,
|
|
|
|
'.': 40,
|
|
|
|
'_': 41,
|
|
|
|
'~': 42,
|
|
|
|
'0': 43,
|
|
|
|
'1': 44,
|
|
|
|
'2': 45,
|
|
|
|
'3': 46,
|
|
|
|
'4': 47,
|
|
|
|
'5': 48,
|
|
|
|
'6': 49,
|
|
|
|
'7': 50,
|
|
|
|
'8': 51,
|
|
|
|
'9': 52,
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewACAutomaton() *ACAutomaton {
|
2021-10-19 19:57:14 +03:00
|
|
|
ac := new(ACAutomaton)
|
2021-04-18 08:21:17 +03:00
|
|
|
ac.trie = append(ac.trie, newNode())
|
|
|
|
ac.fail = append(ac.fail, 0)
|
|
|
|
ac.exists = append(ac.exists, MatchType{
|
|
|
|
matchType: Full,
|
|
|
|
exist: false,
|
|
|
|
})
|
|
|
|
return ac
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ac *ACAutomaton) Add(domain string, t Type) {
|
2021-10-19 19:57:14 +03:00
|
|
|
node := 0
|
2021-04-18 08:21:17 +03:00
|
|
|
for i := len(domain) - 1; i >= 0; i-- {
|
2021-10-19 19:57:14 +03:00
|
|
|
idx := char2Index[domain[i]]
|
2021-04-18 08:21:17 +03:00
|
|
|
if ac.trie[node][idx].nextNode == 0 {
|
|
|
|
ac.count++
|
|
|
|
if len(ac.trie) < ac.count+1 {
|
|
|
|
ac.trie = append(ac.trie, newNode())
|
|
|
|
ac.fail = append(ac.fail, 0)
|
|
|
|
ac.exists = append(ac.exists, MatchType{
|
|
|
|
matchType: Full,
|
|
|
|
exist: false,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
ac.trie[node][idx] = Edge{
|
|
|
|
edgeType: TrieEdge,
|
|
|
|
nextNode: ac.count,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node = ac.trie[node][idx].nextNode
|
|
|
|
}
|
|
|
|
ac.exists[node] = MatchType{
|
|
|
|
matchType: t,
|
|
|
|
exist: true,
|
|
|
|
}
|
|
|
|
switch t {
|
|
|
|
case Domain:
|
|
|
|
ac.exists[node] = MatchType{
|
|
|
|
matchType: Full,
|
|
|
|
exist: true,
|
|
|
|
}
|
2021-10-19 19:57:14 +03:00
|
|
|
idx := char2Index['.']
|
2021-04-18 08:21:17 +03:00
|
|
|
if ac.trie[node][idx].nextNode == 0 {
|
|
|
|
ac.count++
|
|
|
|
if len(ac.trie) < ac.count+1 {
|
|
|
|
ac.trie = append(ac.trie, newNode())
|
|
|
|
ac.fail = append(ac.fail, 0)
|
|
|
|
ac.exists = append(ac.exists, MatchType{
|
|
|
|
matchType: Full,
|
|
|
|
exist: false,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
ac.trie[node][idx] = Edge{
|
|
|
|
edgeType: TrieEdge,
|
|
|
|
nextNode: ac.count,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
node = ac.trie[node][idx].nextNode
|
|
|
|
ac.exists[node] = MatchType{
|
|
|
|
matchType: t,
|
|
|
|
exist: true,
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ac *ACAutomaton) Build() {
|
2021-10-19 19:57:14 +03:00
|
|
|
queue := list.New()
|
2021-04-18 08:21:17 +03:00
|
|
|
for i := 0; i < validCharCount; i++ {
|
|
|
|
if ac.trie[0][i].nextNode != 0 {
|
|
|
|
queue.PushBack(ac.trie[0][i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for {
|
2021-10-19 19:57:14 +03:00
|
|
|
front := queue.Front()
|
2021-04-18 08:21:17 +03:00
|
|
|
if front == nil {
|
|
|
|
break
|
|
|
|
} else {
|
2021-10-19 19:57:14 +03:00
|
|
|
node := front.Value.(Edge).nextNode
|
2021-04-18 08:21:17 +03:00
|
|
|
queue.Remove(front)
|
|
|
|
for i := 0; i < validCharCount; i++ {
|
|
|
|
if ac.trie[node][i].nextNode != 0 {
|
|
|
|
ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode
|
|
|
|
queue.PushBack(ac.trie[node][i])
|
|
|
|
} else {
|
|
|
|
ac.trie[node][i] = Edge{
|
|
|
|
edgeType: FailEdge,
|
|
|
|
nextNode: ac.trie[ac.fail[node]][i].nextNode,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ac *ACAutomaton) Match(s string) bool {
|
2021-10-19 19:57:14 +03:00
|
|
|
node := 0
|
|
|
|
fullMatch := true
|
2021-04-18 08:21:17 +03:00
|
|
|
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
|
|
|
// 2. the match string is through a fail edge. NOT FULL MATCH
|
|
|
|
// 2.1 Through a fail edge, but there exists a valid node. SUBSTR
|
|
|
|
for i := len(s) - 1; i >= 0; i-- {
|
2021-10-19 19:57:14 +03:00
|
|
|
idx := char2Index[s[i]]
|
2021-04-18 08:21:17 +03:00
|
|
|
fullMatch = fullMatch && ac.trie[node][idx].edgeType
|
|
|
|
node = ac.trie[node][idx].nextNode
|
|
|
|
switch ac.exists[node].matchType {
|
|
|
|
case Substr:
|
|
|
|
return true
|
|
|
|
case Domain:
|
|
|
|
if fullMatch {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return fullMatch && ac.exists[node].exist
|
|
|
|
}
|