1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
| #define AHO_CORASICK_ENABLE_WORDS_COUNT
struct aho_corasick {
static const int ALPHABET_SIZE = 27;
vector<array<int, ALPHABET_SIZE>> trie_nodes; vector<int> fail; #ifdef AHO_CORASICK_ENABLE_SUFFIX_LINK vector<int> last; vector<bool> trie_exists; #endif
#ifdef AHO_CORASICK_FAST_MATCHED vector<bool> trie_matched; #endif
#ifdef AHO_CORASICK_ENABLE_WORDS_COUNT vector<vector<int>> fail_tree; vector<int> trie_count; vector<bool> trie_visited; #endif
aho_corasick() { clear(); } void clear() { trie_nodes.clear(); fail.clear(); #ifdef AHO_CORASICK_ENABLE_SUFFIX_LINK trie_exists.clear(); last.clear(); #endif #ifdef AHO_CORASICK_FAST_MATCHED trie_matched.clear(); #endif #ifdef AHO_CORASICK_ENABLE_WORDS_COUNT fail_tree.clear(); trie_count.clear(); trie_visited.clear(); #endif new_node(); } int new_node() { trie_nodes.push_back({}); fail.push_back(0); #ifdef AHO_CORASICK_ENABLE_SUFFIX_LINK trie_exists.push_back(false); last.push_back(0); #endif #ifdef AHO_CORASICK_FAST_MATCHED trie_matched.push_back(false); #endif #ifdef AHO_CORASICK_ENABLE_WORDS_COUNT fail_tree.emplace_back(); trie_visited.push_back(false); trie_count.push_back(0); #endif return int(trie_nodes.size() - 1); } int insert(const string& s) { int p = 0; for (char c : s) { if (trie_nodes[p][c & 31] == 0) { trie_nodes[p][c & 31] = new_node(); } p = trie_nodes[p][c & 31]; } #ifdef AHO_CORASICK_FAST_MATCHED trie_matched[p] = true; #endif #ifdef AHO_CORASICK_ENABLE_SUFFIX_LINK trie_exists[p] = true; #endif return p; }
void build() { queue<int> q; for (int i = 1; i < ALPHABET_SIZE; ++i) { int u = trie_nodes[0][i]; if (u != 0) { q.push(u); } fail[u] = 0; } while (!q.empty()) { int u = q.front(); q.pop(); #ifdef AHO_CORASICK_FAST_MATCHED trie_matched[u] = trie_matched[fail[u]] || trie_matched[u]; #endif for (int i = 1; i < ALPHABET_SIZE; ++i) { int v = trie_nodes[u][i]; if (v == 0) { trie_nodes[u][i] = trie_nodes[fail[u]][i]; } else { fail[v] = trie_nodes[fail[u]][i]; #ifdef AHO_CORASICK_ENABLE_SUFFIX_LINK last[v] = trie_exists[fail[v]] ? fail[v] : last[fail[v]]; #endif q.push(v); } } } #ifdef AHO_CORASICK_ENABLE_WORDS_COUNT int n = int(trie_nodes.size()); for (int i = 1; i < n; ++i) { fail_tree[fail[i]].push_back(i); } #endif }
#ifdef AHO_CORASICK_ENABLE_WORDS_COUNT void search_count(const string& text) { fill(trie_count.begin(), trie_count.end(), 0); fill(trie_visited.begin(), trie_visited.end(), false); ++trie_count[0]; int p = 0; for (char c : text) { p = trie_nodes[p][c & 31]; ++trie_count[p]; } } int calc_count(int u) { if (trie_visited[u]) { return trie_count[u]; } trie_visited[u] = true; for (int v : fail_tree[u]) { trie_count[u] += calc_count(v); } return trie_count[u]; } #endif };
|