-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
27902fa
commit 71478e7
Showing
2 changed files
with
129 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/* | ||
* suffix array algorithm | ||
* | ||
* Features: suffix array can sort all the suffixs in time complexity O(n*log^2(n)), and use memory in O(n). | ||
* And suffix array can get two suffixs' longest common prefix(lcp) in O(log(n)) complexity. | ||
* You can test it by running suffix_array_demo.cpp | ||
* Want to get more detailed information about suffix array? Please google SUFF_AR_ENG.pdf | ||
* | ||
* author: nowerzt@gmail.com | ||
*/ | ||
|
||
#ifndef _SUFFIX_ARRAY_H | ||
#define _SUFFIX_ARRAY_H | ||
|
||
#include <algorithm> | ||
#include <vector> | ||
#include <string> | ||
#include <math.h> | ||
|
||
using namespace std; | ||
|
||
namespace alg { | ||
class SuffixArray { | ||
private: | ||
vector<vector<int> > bucket; | ||
vector<int> suffix; | ||
int N, L, K; | ||
const string& str; | ||
void suffix_sort(); | ||
void update_bucket(); | ||
|
||
bool less_than(int a, int b) { | ||
if(K==0) return str[a]<str[b]; | ||
else { | ||
if(bucket[K-1][a]==bucket[K-1][b]) return bucket[K-1][a+L/2]<bucket[K-1][b+L/2]; | ||
else return bucket[K-1][a]<bucket[K-1][b]; | ||
} | ||
} | ||
|
||
bool equal(int a, int b) { | ||
return !less_than(a,b) && !less_than(b,a); | ||
} | ||
|
||
public: | ||
explicit SuffixArray(const string& s) : N(s.size()), L(0), K(0), str(s) { suffix_sort();} | ||
// return the sorted suffix | ||
int operator [] (int i) { return suffix[i];} | ||
// Given two suffixs of string, return the longest common prefix length | ||
int lcp_length(int x, int y); | ||
}; | ||
|
||
void SuffixArray::suffix_sort() { | ||
// init suffix | ||
suffix.resize(N); | ||
for(int i=0;i<N;i++) suffix[i]=i; | ||
// init bucket | ||
bucket.resize(ceil(log2(N))+1); | ||
for(size_t k=0;k<bucket.size();k++) bucket[k].resize(N+N); | ||
|
||
for(L=1,K=0;(L>>1)<N;L<<=1,K++) { | ||
sort(suffix.begin(), suffix.end(), bind(&SuffixArray::less_than, *this, placeholders::_1, placeholders::_2)); | ||
update_bucket(); | ||
} | ||
} | ||
|
||
|
||
void SuffixArray::update_bucket() { | ||
int seq=0; | ||
bucket[K][suffix[0]]=0; | ||
for(int i=1;i<N;i++) { | ||
if(!equal(suffix[i],suffix[i-1])) seq++; | ||
bucket[K][suffix[i]]=seq; | ||
} | ||
fill(bucket[K].begin()+N, bucket[K].end(), -1); | ||
} | ||
|
||
int SuffixArray::lcp_length(int x, int y) { | ||
if(x==y) return N-x; | ||
int ret=0; | ||
for(int k=K-1;k>=0 && x<N && y<N;k--) { | ||
if(bucket[k][x]==bucket[k][y]) { | ||
x += (1<<k); | ||
y += (1<<k); | ||
ret += (1<<k); | ||
} | ||
} | ||
return ret; | ||
} | ||
} | ||
|
||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#include <iostream> | ||
#include <string> | ||
#include <math.h> | ||
|
||
#include "suffix_array.h" | ||
|
||
using namespace std; | ||
using namespace alg; | ||
|
||
void print(string::iterator b, string::iterator e) { | ||
for(auto it=b;it!=e;++it) cout<<*it; | ||
} | ||
|
||
int main() | ||
{ | ||
string str; | ||
while(cin>>str) { | ||
SuffixArray sa(str); | ||
cout<<endl; | ||
cout<<"sorted suffixs are:"<<endl; | ||
for(size_t i=0;i<str.size();i++) { | ||
print(str.begin()+sa[i], str.end()); | ||
cout<<endl; | ||
} | ||
cout<<endl; | ||
cout<<"The length of the longest common prefix of two suffixs "; | ||
int i=rand()%str.size(); | ||
int j=rand()%str.size(); | ||
print(str.begin()+i,str.end()); | ||
cout<<" and "; | ||
print(str.begin()+j,str.end()); | ||
cout<<" is "; | ||
cout<<sa.lcp_length(i,j)<<endl; | ||
cout<<endl; | ||
} | ||
return 0; | ||
} |