后缀系列例题一

后缀系列例题一,第1张

后缀系列例题一

2022年01月22日,第八天

借一张后缀自动机经典图片镇镇楼……

1. 题目链接:P3804 【模板】后缀自动机 (SAM)

思路:直接上 S A M SAM SAM ,然后做个基数排序倒序遍历求和即可,本质是作后缀树(后缀链接反向即是)的前缀和。换句话说,我们也可以建后缀链接的反向边求树上前缀和,再计算贡献。

#include 
#define ull unsigned long long
#define ll long long
#define re register
#define endl 'n'

using namespace std;

const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 2e6 + 5;

struct node {
    int len, link, ch[26];
}st[N << 1];
int sz, last;
int sum[N], a[N], b[N];
char s[N];

void sam_init () { sz = last = 1; }

void sam_extend (int c) {
    int cur = ++ sz, p = last;
    st[cur].len = st[p].len + 1, last = cur;
    for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur;
    if (! p) st[cur].link = 1;
    else {
        int q = st[p].ch[c];
        if (st[p].len + 1 == st[q].len) st[cur].link = q;
        else {
            int clone = ++ sz;
            st[clone].len = st[p].len + 1;
            st[clone].link = st[q].link;
            memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch));
            for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone;
            st[q].link = st[cur].link = clone;
        }
    }
    sum[cur] = 1;
}

int main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
    cin >> s;
    sam_init ();
    for (int i = 0; s[i]; i ++) sam_extend (s[i] - 'a');
    
    for (int i = 1; i <= sz; i ++) b[st[i].len] ++;
    for (int i = 1; i <= sz; i ++) b[i] += b[i - 1];
    for (int i = 1; i <= sz; i ++) a[b[st[i].len] --] = i;

    ll ans = 0;
    for (int i = sz; i >= 1; i --) {
        int p = a[i];
        sum[st[p].link] += sum[p];
        if (sum[p] > 1) 
            ans = max (ans, (ll)sum[p] * st[p].len);
    }
    
    cout << ans << endl;
    return 0;
}
2. 题目链接:SP705 SUBST1 - New Distinct Substrings

下面提供后缀数组和后缀自动机两种思路:

思路1 :后缀数组解法,我们反面考虑,一个长度为 n n n 的字符串有 n × ( n + 1 ) 2 frac{ntimes(n+1)}{2} 2n×(n+1)​ 个子串,现在我们把题目的所要求解的转变为求解其中重复子串个数并去重。这时我们利用后缀数组的 h e i g h t height height 数组解决该问题,考虑把每一个子串都记为原串中某个后缀的某个前缀。

h e i g h t height height 数组的含义是排名第 i i i 名的后缀与第 i − 1 i - 1 i−1 名的后缀的公共前缀的长度。根据我们对子串的记录方式,则在暴力计算子串个数时,这个公共前缀的所有前缀字符串是被重复计算的,所以答案要减去这个公共前缀的长度,即 h e i g h t [ i ] height[i] height[i] 。

记得开 l o n g    l o n g ! ! ! long long!!! long  long!!! 。

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#define ull unsigned long long
#define ll long long
#define re register
#define endl 'n'
using namespace std;

const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 5e5 + 5;

char s[N];
int sa[N], rk[N], height[N];
int x[N], y[N << 1], c[N], n, m;

void get_sa() {
    m = 122;
    for (int i = 1; i <= m; i ++) c[i] = 0;
    for (int i = 1; i <= n; i ++) c[x[i] = s[i]] ++;
    for (int i = 2; i <= m; i ++) c[i] += c[i - 1];
    for (int i = n; i >= 1; i --) sa[c[x[i]] --] = i;
    for (int k = 1; k <= n; k <<= 1) {
        int num = 0;
        for (int i = n - k + 1; i <= n; i ++) y[++ num] = i;
        for (int i = 1; i <= n; i ++) if (sa[i] > k) y[++ num] = sa[i] - k;
        for (int i = 1; i <= m; i ++) c[i] = 0;
        for (int i = 1; i <= n; i ++) c[x[i]] ++;
        for (int i = 2; i <= m; i ++) c[i] += c[i - 1];
        for (int i = n; i >= 1; i --) sa[c[x[y[i]]] --] = y[i], y[i] = 0;
        for (int i = 1; i <= n; i ++) swap(x[i], y[i]);
        x[sa[1]] = num = 1;
        for (int i = 2; i <= n; i ++)
            x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num;
        if (num == n) break;
        m = num;
    }
}

void get_height() {
    for (int i = 1; i <= n; i ++) rk[sa[i]] = i;
    for (int i = 1, k = 0; i <= n; i ++) {
        if (rk[i] == 1) continue;
        if (k) k --;
        int j = sa[rk[i] - 1];
        while (i + k <= n && s[i + k] == s[j + k]) k ++;
        height[rk[i]] = k;
    }
}

int main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
    int T; cin >> T;
    while (T --) {
        cin >> s + 1;
        n = strlen(s + 1);
        get_sa();
        get_height();
        ll ans = (ll)n * (n + 1) / 2;
        for (int i = 1; i <= n; i ++)
            ans -= height[i];
        cout << ans << 'n';
    }
    return 0;
}

思路2:后缀自动机求解,根据 p a r e n t parent parent 树的性质(也就是后缀链接反向边形成的树),则结点 i i i 到根结点 l i n k [ i ] link[i] link[i] 本质不同的串的个数为 l e n [ i ] − l e n [ l i n k [ i ] ] len[i] - len[link[i]] len[i]−len[link[i]] ,然后就是上板子一边插入字符一边添加贡献即可,缺点相比后缀数组显而易见,空间开销太大了。

#include 
#define ull unsigned long long
#define ll long long
#define re register
#define endl 'n'

using namespace std;

const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 1e5 + 5;

struct node {
    int len, link, ch[26];
}st[N << 1];
ll sz, last, sum;
char s[N];

void sam_init () { 
    for (int i = 0; i <= sz; i ++) {
        st[i].len = st[i].link = 0;
        for (int j = 0; j < 26; j ++) 
            st[i].ch[j] = 0;
    }
    sz = last = 1, sum = 0;
}

void sam_extend (int c) {
    int p = last, cur = ++ sz;
    st[cur].len = st[p].len + 1, last = cur;
    for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur;
    if (! p) st[cur].link = 1;
    else {
        int q = st[p].ch[c];
        if (st[p].len + 1 == st[q].len) st[cur].link = q;
        else {
            int clone = ++ sz;
            st[clone].len = st[p].len + 1;
            st[clone].link = st[q].link;
            memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch));
            for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone;
            st[q].link = st[cur].link = clone;
        }
    }
    sum += st[cur].len - st[st[cur].link].len;
}

int main() {
    ios::sync_with_stdio(false), cin.tie(0), cout.tie(0);
    int T; cin >> T;
    while (T --) {
        cin >> s;
        sum = 0, sam_init ();
        for (int i = 0; s[i]; i ++) sam_extend (s[i] - 'a');
        cout << sum << endl;
    }
    return 0;
}

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5710996.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-17
下一篇 2022-12-17

发表评论

登录后才能评论

评论列表(0条)

保存