structGSA{ int len[MAXN]; // 节点长度 int link[MAXN]; // 后缀链接,link int next[MAXN][CHAR_NUM]; // 转移 int tot; // 节点总数:[0, tot)
intinsertSAM(int last, int c){ int cur = next[last][c]; len[cur] = len[last] + 1; int p = link[last]; while (p != -1) { if (!next[p][c]) next[p][c] = cur; elsebreak; p = link[p]; } if (p == -1) { link[cur] = 0; return cur; } int q = next[p][c]; if (len[p] + 1 == len[q]) { link[cur] = q; return cur; } int clone = tot++; for (int i = 0; i < CHAR_NUM; ++i) next[clone][i] = len[next[q][i]] != 0 ? next[q][i] : 0; len[clone] = len[p] + 1; while (p != -1 && next[p][c] == q) { next[p][c] = clone; p = link[p]; } link[clone] = link[q]; link[cur] = clone; link[q] = clone;
return cur; }
voidbuild(){ queue<pair<int, int>> q; for (int i = 0; i < 26; ++i) if (next[0][i]) q.push({i, 0}); while (!q.empty()) { auto item = q.front(); q.pop(); auto last = insertSAM(item.second, item.first); for (int i = 0; i < 26; ++i) if (next[last][i]) q.push({i, last}); } } }
由于整个 $BFS$ 的过程得到的顺序,其父节点始终在变化,所以并不需要保存 last 指针。
插入操作中,int cur = next[last][c]; 与正常后缀自动机的 int cur = tot++; 有差异,因为我们插入的节点已经在树型结构中完成了,所以只需要直接获取即可
structexSAM { int len[MAXN]; // 节点长度 int link[MAXN]; // 后缀链接,link int next[MAXN][CHAR_NUM]; // 转移 int tot; // 节点总数:[0, tot)
voidinit(){ tot = 1; link[0] = -1; }
intinsertSAM(int last, int c){ int cur = next[last][c]; if (len[cur]) return cur; len[cur] = len[last] + 1; int p = link[last]; while (p != -1) { if (!next[p][c]) next[p][c] = cur; elsebreak; p = link[p]; } if (p == -1) { link[cur] = 0; return cur; } int q = next[p][c]; if (len[p] + 1 == len[q]) { link[cur] = q; return cur; } int clone = tot++; for (int i = 0; i < CHAR_NUM; ++i) next[clone][i] = len[next[q][i]] != 0 ? next[q][i] : 0; len[clone] = len[p] + 1; while (p != -1 && next[p][c] == q) { next[p][c] = clone; p = link[p]; } link[clone] = link[q]; link[cur] = clone; link[q] = clone;
return cur; }
intinsertTrie(int cur, int c){ if (next[cur][c]) return next[cur][c]; return next[cur][c] = tot++; }
voidinsert(const string &s){ int root = 0; for (auto ch : s) root = insertTrie(root, ch - 'a'); }
voidinsert(constchar *s, int n){ int root = 0; for (int i = 0; i < n; ++i) root = insertTrie(root, s[i] - 'a'); }
voidbuild(){ queue<pair<int, int>> q; for (int i = 0; i < 26; ++i) if (next[0][i]) q.push({i, 0}); while (!q.empty()) { auto item = q.front(); q.pop(); auto last = insertSAM(item.second, item.first); for (int i = 0; i < 26; ++i) if (next[last][i]) q.push({i, last}); } } } exSam;
char s[1000100];
intmain(){ int n; cin >> n; exSam.init(); for (int i = 0; i < n; ++i) { cin >> s; int len = strlen(s); exSam.insert(s, len); } exSam.build();
longlong ans = 0; for (int i = 1; i < exSam.tot; ++i) { ans += exSam.len[i] - exSam.len[exSam.link[i]]; } cout << ans << endl; }
多个字符串间的最长公共子串
我们需要对每个节点建立一个长度为 $k$ 的数组 flag(对于本题而言,可以仅为标记数组,若需要求出此子串的个数,则需要改成计数数组) 在字典树插入字符串时,对所有节点进行计数,保存在当前字符串所在的数组 然后按照 len 递减的顺序遍历,通过后缀链接将当前节点的 flag 与其他节点的合并 遍历所有的节点,找到一个 len 最大且满足对于所有的 k ,其 flag 的值均为非 $0$ 的节点,此节点的 $len$ 即为解
structexSAM { int len[MAXN]; // 节点长度 int link[MAXN]; // 后缀链接,link int next[MAXN][CHAR_NUM]; // 转移 int tot; // 节点总数:[0, tot)
int lenSorted[MAXN]; // 按照 len 排序后的数组,仅排序 [1, tot) 部分,最终下标范围 [0, tot - 1) int sizeC[MAXN][NUM]; // 表示某个字符串的子串个数 int curString; // 字符串实际个数 /** * 计数排序使用的辅助空间数组 */ int lc[MAXN]; // 统计个数
voidinit(){ tot = 1; link[0] = -1; }
intinsertSAM(int last, int c){ int cur = next[last][c]; len[cur] = len[last] + 1; int p = link[last]; while (p != -1) { if (!next[p][c]) next[p][c] = cur; elsebreak; p = link[p]; } if (p == -1) { link[cur] = 0; return cur; } int q = next[p][c]; if (len[p] + 1 == len[q]) { link[cur] = q; return cur; } int clone = tot++; for (int i = 0; i < CHAR_NUM; ++i) next[clone][i] = len[next[q][i]] != 0 ? next[q][i] : 0; len[clone] = len[p] + 1; while (p != -1 && next[p][c] == q) { next[p][c] = clone; p = link[p]; } link[clone] = link[q]; link[cur] = clone; link[q] = clone;
return cur; }
intinsertTrie(int cur, int c){ if (!next[cur][c]) next[cur][c] = tot++; sizeC[next[cur][c]][curString]++; return next[cur][c]; }
voidinsert(const string &s){ int root = 0; for (auto ch : s) root = insertTrie(root, ch - 'a'); curString++; }
voidinsert(constchar *s, int n){ int root = 0; for (int i = 0; i < n; ++i) root = insertTrie(root, s[i] - 'a'); curString++; }
voidbuild(){ queue<pair<int, int>> q; for (int i = 0; i < 26; ++i) if (next[0][i]) q.push({i, 0}); while (!q.empty()) { auto item = q.front(); q.pop(); auto last = insertSAM(item.second, item.first); for (int i = 0; i < 26; ++i) if (next[last][i]) q.push({i, last}); } }
voidsortLen(){ for (int i = 1; i < tot; ++i) lc[i] = 0; for (int i = 1; i < tot; ++i) lc[len[i]]++; for (int i = 2; i < tot; ++i) lc[i] += lc[i - 1]; for (int i = 1; i < tot; ++i) lenSorted[--lc[len[i]]] = i; }
voidgetSizeLen(){ for (int i = tot - 2; i >= 0; --i) for (int j = 0; j < curString; ++j) sizeC[link[lenSorted[i]]][j] += sizeC[lenSorted[i]][j]; }
voiddebug(){ cout << " i len link "; for (int i = 0; i < 26; ++i) cout << " " << (char) ('a' + i); cout << endl; for (int i = 0; i < tot; ++i) { cout << "i: " << setw(3) << i << " len: " << setw(3) << len[i] << " link: " << setw(3) << link[i] << " Next: "; for (int j = 0; j < CHAR_NUM; ++j) { cout << setw(3) << next[i][j]; } cout << endl; } } } exSam;
intmain(){ exSam.init(); string s; while (cin >> s) { exSam.insert(s); } exSam.build(); exSam.sortLen(); exSam.getSizeLen(); int ans = 0; for (int i = 0; i < exSam.tot; ++i) { bool flag = true; for (int j = 0; j < exSam.curString; ++j) { if (!exSam.sizeC[i][j]) { flag = false; break; } } if (flag) ans = max(ans, exSam.len[i]); } cout << ans << endl; }