Skip to content

Commit

Permalink
Merge pull request simongog#192 from simongog/sd_vector_sel0
Browse files Browse the repository at this point in the history
Faster select_0 support for sd_vector
  • Loading branch information
simongog committed Jun 10, 2014
2 parents ad5c1ae + 49fd6fd commit 44729af
Show file tree
Hide file tree
Showing 3 changed files with 303 additions and 0 deletions.
122 changes: 122 additions & 0 deletions examples/sd_vector_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include <sdsl/bit_vectors.hpp>
#include <random>
#include <iostream>
#include <chrono>

using namespace sdsl;
using namespace std;

using namespace std::chrono;
using timer = std::chrono::high_resolution_clock;


template<class t_vec>
uint64_t test_inv_random_access(const t_vec& v, const int_vector<64>& rands, uint64_t mask, uint64_t times=100000000)
{
uint64_t cnt=0;
for (uint64_t i=0; i<times; ++i) {
cnt += v(rands[ i&mask ]);
}
return cnt;
}



//int main(int argc, char* argv[]){
int main()
{
auto start = timer::now();
bool default_value = 0; //ID[ID.length()-1]-'0';
bit_vector bv = bit_vector(800000000, default_value);

std::mt19937_64 rng;
std::uniform_int_distribution<uint64_t> distribution(0, bv.size()-1);
auto dice = bind(distribution, rng);
// populate vectors with some other bits
for (uint64_t i=0; i < bv.size()/25; ++i) {
uint64_t x = dice();
bv[x] = !default_value;
}
auto stop = timer::now();
cout << "initialization in (ms): " << duration_cast<milliseconds>(stop-start).count() << endl;
cout << "size in MiB: " << size_in_mega_bytes(bv) << endl;

start = timer::now();
sd_vector<> bv_sd(bv);
stop = timer::now();
cout << "sd_construction in (ms): " << duration_cast<milliseconds>(stop-start).count() << endl;
{
bit_vector().swap(bv);
}
cout << "size in MiB: " << size_in_mega_bytes(bv_sd) << endl;
cout << "wl = " << (size_t) bv_sd.wl << endl;
cout << "n = " << bv_sd.size() << endl;
cout << "2*m = " << bv_sd.high.size()<<endl;
cout <<"n/m=" << (2.0*bv_sd.size())/bv_sd.high.size()<<endl;

auto zeros = sd_vector<>::rank_0_type(&bv_sd)(bv_sd.size());
auto ones = bv_sd.size()-zeros;
cout << "zeros = "<< zeros << endl;
{
uint64_t mask = 0;
auto rands = util::rnd_positions<int_vector<64>>(20, mask, zeros, 17);
for (uint64_t i=0; i<rands.size(); ++i) rands[i] = rands[i]+1;
sd_vector<>::select_0_type select0(&bv_sd);
const uint64_t reps = 10000000;
start = timer::now();
auto check = test_inv_random_access(select0, rands, mask, reps);
stop = timer::now();

cout << "# select0_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
cout << "# select_check = " << check << endl;
cout << "# size_in_mega_bytes(bv_sd) = " << size_in_mega_bytes(bv_sd) << endl;
cout << "# size_in_mega_bytes(select0) = " << size_in_mega_bytes(select0) << endl;
}
{
uint64_t mask = 0;
auto rands = util::rnd_positions<int_vector<64>>(20, mask, zeros, 17);
for (uint64_t i=0; i<rands.size(); ++i) rands[i] = rands[i]+1;
select_0_support_sd<sd_vector<>> select0(&bv_sd);
const uint64_t reps = 10000000;
start = timer::now();
auto check = test_inv_random_access(select0, rands, mask, reps);
stop = timer::now();

cout << "# select0_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
cout << "# select_check = " << check << endl;
cout << "# size_in_mega_bytes(bv_sd) = " << size_in_mega_bytes(bv_sd) << endl;
cout << "# size_in_mega_bytes(select0) = " << size_in_mega_bytes(select0) << endl;
}
{
uint64_t mask = 0;
auto rands = util::rnd_positions<int_vector<64>>(20, mask, ones, 17);
for (uint64_t i=0; i<rands.size(); ++i) rands[i] = rands[i]+1;
sd_vector<>::select_1_type select1(&bv_sd);
const uint64_t reps = 10000000;
start = timer::now();
auto check = test_inv_random_access(select1, rands, mask, reps);
stop = timer::now();

cout << "# select1_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
cout << "# select_check = " << check << endl;
}
{
uint64_t mask = 0;
auto rands = util::rnd_positions<int_vector<64>>(20, mask, bv_sd.size(), 17);
cout<<"done"<<endl;
cout<<(uint64_t)&(bv_sd.high_1_select)<<endl;
cout<<(uint64_t)&(bv_sd.high_0_select)<<endl;
sd_vector<>::rank_1_type rank1(&bv_sd);
cout<<"done"<<endl;
const uint64_t reps = 10000000;
// for(size_t i=0; i<bv_sd.size();++i){
// cout << "i="<<i<<" rank1("<<i<<")="<<rank1(i)<<endl;
// }
start = timer::now();
auto check = test_inv_random_access(rank1, rands, mask, reps);
stop = timer::now();

cout << "# rank1_time = " << duration_cast<nanoseconds>(stop-start).count()/(double)reps << endl;
cout << "# select_check = " << check << endl;
}
}
180 changes: 180 additions & 0 deletions include/sdsl/sd_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,5 +493,185 @@ class select_support_sd
}
};


//! Select_0 data structure for sd_vector
/*! \tparam t_sd_vector sd_vector type
* \tparam t_rank_1 Rank support for high part of sd_vector
*/
template<typename t_sd_vector=sd_vector<>>
class select_0_support_sd
{
public:
typedef bit_vector::size_type size_type;
typedef t_sd_vector bit_vector_type;
using rank_1 = typename t_sd_vector::rank_1_type;
using sel0_type = typename t_sd_vector::select_0_type;
typedef bit_vector y_high_type;
enum { bit_pat = 0 };

private:
const bit_vector_type* m_v;
int_vector<> m_pointer;
int_vector<> m_rank1;
public:

explicit select_0_support_sd(const bit_vector_type* v=nullptr) {
set_vector(v);
if (nullptr != m_v) {
size_type rank_0 = 0; // rank0 in H
const size_type bs = 1ULL << (m_v->wl);
size_type z = 0;
size_type rank1 = 0;// rank1 in H
size_type zeros = m_v->size() - rank_1(m_v)(m_v->size()); // zeros in B
m_pointer = int_vector<>(zeros/(64*bs)+1, 0, bits::hi(m_v->high.size()/64)+1);
m_rank1 = int_vector<>(m_pointer.size(), 0, bits::hi(m_v->high.size())+1);
uint64_t w=0;
for (size_type i=0, sel0=1; i < m_v->high.size(); i+=64) {
size_type old_rank1 = rank1;
w = m_v->high.get_int(i, 64);
rank1 += bits::cnt(w);
rank_0 = (i+64)-rank1;
if (rank1 > 0 and (w>>63)&1) {
uint64_t pos = rank_0*bs + m_v->low[rank1-1]; // pos of last one (of previous block in B
z = pos + 1 - rank1;
} else {
z = rank_0*bs - rank1;
}
while (sel0 <= z and sel0 <= zeros) {
m_pointer[(sel0-1)/(64*bs)] = i/64;
m_rank1[(sel0-1)/(64*bs)] = old_rank1;
sel0 += 64*bs;
}
}
}
}

//! Returns the position of the i-th occurrence in the bit vector.
size_type select(size_type i)const {
const size_type bs = 1ULL << (m_v->wl);
size_type j = m_pointer[(i-1)/(64*bs)]*64;// index into m_high
size_type rank1 = m_rank1[(i-1)/(64*bs)]; // rank_1(j*bs*64) in B
size_type pos = 0;
size_type rank0 = 0;

if (rank1 > 0 and (m_v->high[j-1])&1) {
pos = (j-rank1)*bs + m_v->low[rank1-1]; // starting position of current block
rank0 = pos+1-rank1;
} else {
pos = (j-rank1)*bs;// starting position of current block
rank0 = pos-rank1;
}
uint64_t w = m_v->high.get_int(j, 64);
do {
uint64_t _rank1 = rank1 + bits::cnt(w);
uint64_t _rank0 = 0;
if (_rank1 > 0 and (w>>63)&1) {
pos = (j+64-_rank1)*bs + m_v->low[_rank1-1];
_rank0 = pos+1-_rank1;
} else {
pos = (j+64-_rank1)*bs;
_rank0 = pos-_rank1;
}
if (_rank0 < i) {
j+=64;
w = m_v->high.get_int(j, 64);
rank1 = _rank1;
} else {
break;
}
} while (true);
// invariant i >zeros
do {
uint64_t _rank1 = rank1 + bits::lt_cnt[w&0xFFULL];
uint64_t _rank0 = 0;
if (_rank1 > 0 and (w>>7)&1) {
pos = (j+8-_rank1)*bs + m_v->low[_rank1-1];
_rank0 = pos+1-_rank1;
} else {
pos = (j+8-_rank1)*bs;
_rank0 = pos-_rank1;
}
if (_rank0 < i) {
j+=8;
w >>= 8;
rank1 = _rank1;
} else {
break;
}
} while (true);

do {
bool b = w&1ULL;
w >>= 1; // zeros are shifted in
++j;
if (0 == b) {
pos = (j-rank1)*bs;
size_type zeros = pos-rank1;
if (zeros >= i) {
pos = pos - (zeros-i) - 1;
break;
}
} else {
pos = (j-1-rank1)*bs;
size_type one_pos = pos + m_v->low[rank1];
++rank1;
size_type zeros = one_pos + 1 - rank1;
if (zeros >= i) {
pos = one_pos - (zeros-i) - 1;
break;
}
}
if (j%64==0) {
w = m_v->high.get_int(j,64);
}
} while (true);
return pos;
}

size_type operator()(size_type i)const {
return select(i);
}

size_type size()const {
return m_v->size();
}

void set_vector(const bit_vector_type* v=nullptr) {
m_v = v;
}

select_0_support_sd& operator=(const select_0_support_sd& ss) {
if (this != &ss) {
m_pointer = ss.m_pointer;
m_rank1 = ss.m_rank1;
set_vector(ss.m_v);
}
return *this;
}

void swap(select_0_support_sd& ss) {
m_pointer.swap(ss.m_pointer);
m_rank1.swap(ss.m_rank1);
}

void load(std::istream& in, const bit_vector_type* v=nullptr) {
m_pointer.load(in);
m_rank1.load(in);
set_vector(v);
}

size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const {
structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
size_type written_bytes = 0;
written_bytes += m_pointer.serialize(out, child, "pointer");
written_bytes += m_rank1.serialize(out, child, "rank1");
structure_tree::add_size(child, written_bytes);
return written_bytes;
}

};



} // end namespace
#endif
1 change: 1 addition & 0 deletions test/SelectSupportTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ typedef Types<select_support_mcl<>,
select_support_rrr<1, 128>,
select_support_sd<1>,
select_support_sd<0>,
select_0_support_sd<>,
select_support_il<1, 256>,
select_support_il<1, 512>,
select_support_il<1, 1024>,
Expand Down

0 comments on commit 44729af

Please sign in to comment.