-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSuffix array.cpp
More file actions
135 lines (132 loc) · 2.85 KB
/
Suffix array.cpp
File metadata and controls
135 lines (132 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include<bits/stdc++.h>
using namespace std;
#define MAX 1000010
#define SZ(a) (int)a.size()
string text;
int revSA[MAX],SA[MAX];
int cnt[MAX] , nxt[MAX];
bool bh[MAX],b2h[MAX];
int lcp[MAX];
bool cmp(int i,int j)
{
return text[i]<text[j];
}
void sortFirstChar(int n)
{
/// sort for the first char ...
for(int i =0 ; i<n ; i++)
SA[i] = i;
sort(SA,SA+n ,cmp);
///indentify the bucket ........
for(int i=0 ; i<n ; i++)
{
bh[i] = (i==0 || text[SA[i]]!=text[SA[i-1]]);
b2h[i] = false;
}
}
int CountBucket(int n)
{
int bucket = 0;
for(int i =0 ,j; i<n ; i=j)
{
j = i+1;
while(j<n && bh[j]==false) j++;
nxt[i] = j;
bucket++;
}
return bucket;
}
void SetRank(int n)
{
for(int i = 0 ; i<n ; i=nxt[i])
{
cnt[i] = 0;
for(int j =i ; j<nxt[i] ; j++)
{
revSA[SA[j]] = i;
}
}
}
void findNewRank(int l,int r,int step)
{
for(int j = l ; j<r ; j++)
{
int pre = SA[j] - step;
if(pre>=0)
{
int head = revSA[pre];
revSA[pre] = head+cnt[head]++;
b2h[revSA[pre]] = true;
}
}
}
void findNewBucket(int l,int r,int step)
{
for(int j = l ; j<r ; j++)
{
int pre = SA[j] - step;
if(pre>=0 && b2h[revSA[pre]])
{
for(int k = revSA[pre]+1 ; b2h[k] && !bh[k] ; k++) b2h[k] = false;
}
}
}
void buildSA(int n)
{
///start sorting in logn step ...
sortFirstChar(n);
for(int h =1 ; h<n ; h<<=1)
{
if(CountBucket(n)==n) break;
SetRank(n);
/// cause n-h suffix must be sorted
b2h[revSA[n-h]] = true;
cnt[revSA[n-h]]++;
for(int i = 0 ; i<n ; i=nxt[i])
{
findNewRank(i,nxt[i] , h);
findNewBucket(i , nxt[i] , h);
}
///set the new sorted suffix array ...
for(int i =0 ; i<n ; i++)
{
SA[revSA[i]] = i;
bh[i] |= b2h[i]; ///new bucket ....
}
}
}
void buildLCP(int n)
{
int len = 0;
for(int i = 0 ; i<n ; i++)
revSA[SA[i]] = i;
for(int i =0 ; i< n ; i++)
{
int k = revSA[i];
if(k==0)
{
lcp[k] = 0;
continue;
}
int j = SA[k-1];
while(text[i+len]==text[j+len]) len++;
lcp[k] = len;
if(len) len--;
}
}
void printSA()
{
for(int i=0; i<SZ(text); i++) printf("%d %d %d %s %d\n", i, SA[i],
revSA[SA[i]], text.substr(SA[i]).c_str(), lcp[i]);
puts("");
// for(int i=1;i<SZ(text);i++) printf("%d ",lcp[i]);
puts("");
}
int main ()
{
cin>>text;
buildSA(text.size());
buildLCP(text.size());
printSA();
return 0;
}