SuffixArray
romophic-library
用途
高速な文字列検索を行う. イメージはgrep.
計算量
構築: $ O(N \log N) $
クエリ: $ O(M \log N) $
使い方
構築
SuffixArray sufa(s);
検索
auto res = sufa.lower_upper_bound(t);
一致部分として[res.first,res.second)
が得られる.
実装(WIP)
struct SuffixArray {
vector<int> SA;
const string s;
SuffixArray(const string &str) : s(str) {
SA.resize(s.size());
iota(begin(SA), end(SA), 0);
sort(begin(SA), end(SA), [&](int a, int b) {
return s[a] == s[b] ? a > b : s[a] < s[b];
});
vector<int> classes(s.size()), c(s.begin(), s.end()), cnt(s.size());
for (int len = 1; len < s.size(); len <<= 1) {
for (int i = 0; i < s.size(); i++) {
if (i > 0 && c[SA[i - 1]] == c[SA[i]] && SA[i - 1] + len < s.size() && c[SA[i - 1] + len / 2] == c[SA[i] + len / 2]) {
classes[SA[i]] = classes[SA[i - 1]];
} else {
classes[SA[i]] = i;
}
}
iota(begin(cnt), end(cnt), 0);
copy(begin(SA), end(SA), begin(c));
for (int i = 0; i < s.size(); i++) {
int s1 = c[i] - len;
if (s1 >= 0)
SA[cnt[classes[s1]]++] = s1;
}
classes.swap(c);
}
}
int operator[](int k) const {
return SA[k];
}
size_t size() const {
return s.size();
}
bool lt_substr(const string &t, int si = 0, int ti = 0) {
int sn = (int)s.size(), tn = (int)t.size();
while (si < sn && ti < tn) {
if (s[si] < t[ti])
return true;
if (s[si] > t[ti])
return false;
++si, ++ti;
}
return si >= sn && ti < tn;
}
int lower_bound(const string &t) {
int low = -1, high = (int)SA.size();
while (high - low > 1) {
int mid = (low + high) / 2;
if (lt_substr(t, SA[mid]))
low = mid;
else
high = mid;
}
return high;
}
pair<int, int> lower_upper_bound(string &t) {
int idx = lower_bound(t);
int low = idx - 1, high = (int)SA.size();
t.back()++;
while (high - low > 1) {
int mid = (low + high) / 2;
if (lt_substr(t, SA[mid]))
low = mid;
else
high = mid;
}
t.back()--;
return {idx, high};
}
void output() {
for (int i = 0; i < size(); i++) {
cout << i << ": " << s.substr(SA[i]) << endl;
}
}
};
Verify
//TODO