Skip to content

Commit b5301cb

Browse files
committed
updated readme
1 parent 40e9582 commit b5301cb

File tree

5 files changed

+129
-147
lines changed

5 files changed

+129
-147
lines changed

README.md

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,79 @@
1+
# RocksDB Equipped With SuRF
2+
3+
## Install Dependencies
4+
sudo apt-get install build-essential cmake libsnappy
5+
cd /usr/src/gtest
6+
sudo cmake CMakeLists.txt
7+
sudo make
8+
sudo cp *.a /usr/lib
9+
10+
## Build
11+
git submodule init
12+
git submodule update
13+
mkdir build
14+
cd build
15+
cmake -DWITH_SNAPPY=ON ..
16+
make -j 8
17+
18+
## Generate Workload and RocksDB Instances
19+
The experiments presented in our
20+
[SIGMOD paper](http://www.cs.cmu.edu/~huanche1/publications/surf_paper.pdf)
21+
uses 100GB datasets. Because it takes a long time to run, we scale the experiments
22+
down to 2GB datasets (the 100GB experiment configs are still included in
23+
filter_experiment/filter_experiment.cc, commented out).
24+
25+
cd filter_experiment
26+
python poisson.py
27+
mkdir data_no_filter data_bloom data_surf
28+
29+
Run the executable "../build/filter_experiment/filter_experiment" will show the usage information:
30+
Usage:
31+
arg 1: path to datafiles
32+
arg 2: filter type
33+
0: no filter
34+
1: Bloom filter
35+
2: SuRF
36+
3: SuRF Hash
37+
4: SuRF Real
38+
arg 3: compression?
39+
0: no compression
40+
1: Snappy
41+
arg 4: use direct I/O?
42+
0: no
43+
1: yes
44+
arg 5: query type
45+
0: init
46+
1: point query
47+
2: open range query
48+
3: closed range query
49+
arg 6: range size
50+
arg 7: warmup # of queries
51+
52+
To initialize the RocksDB instances with no filter, bloom filters and SuRF (with real suffixes):
53+
54+
../build/filter_experiment/filter_experiment data_no_filter 0 1 0 0 0 0
55+
../build/filter_experiment/filter_experiment data_bloom 1 1 0 0 0 0
56+
../build/filter_experiment/filter_experiment data_surf 4 1 0 0 0 0
57+
58+
## Run Benchmark
59+
You may want to clear system cache (echo 3 | sudo tee /proc/sys/vm/drop_caches)
60+
before running each experiment. The source file for the experiments is
61+
"filter_experiment/filter_experiment.cc". To get correct I/O counts, you need
62+
to specify the device in function "getIOCount()" and "printIO()".
63+
filter_experiment.cc includes more configurations (e.g., specify filter sizes)
64+
to run different experiments besides the following examples:
65+
66+
// point queries
67+
../build/filter_experiment/filter_experiment data_no_filter 0 1 1 1 0 0
68+
../build/filter_experiment/filter_experiment data_bloom 1 1 1 1 0 0
69+
../build/filter_experiment/filter_experiment data_surf 4 1 1 1 0 0
70+
71+
// closed-range queries (50% queries return empty results)
72+
../build/filter_experiment/filter_experiment data_no_filter 0 1 1 3 69310 0
73+
../build/filter_experiment/filter_experiment data_bloom 1 1 1 3 69310 0
74+
../build/filter_experiment/filter_experiment data_surf 4 1 1 3 69310 0
75+
76+
177
## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage
278

379
[![Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb)

filter_experiment/filter_experiment.cc

Lines changed: 35 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,14 @@ void init(const std::string& key_path, const std::string& db_path, rocksdb::DB**
8383
options->max_open_files = -1; // pre-load indexes and filters
8484

8585
// 2GB config
86-
//options->write_buffer_size = 2 * 1048576;
87-
//options->max_bytes_for_level_base = 10 * 1048576;
88-
//options->target_file_size_base = 2 * 1048576;
86+
options->write_buffer_size = 2 * 1048576;
87+
options->max_bytes_for_level_base = 10 * 1048576;
88+
options->target_file_size_base = 2 * 1048576;
8989

9090
// 100GB config
91-
options->write_buffer_size = 64 * 1048576;
92-
options->max_bytes_for_level_base = 256 * 1048576;
93-
options->target_file_size_base = 64 * 1048576;
91+
//options->write_buffer_size = 64 * 1048576;
92+
//options->max_bytes_for_level_base = 256 * 1048576;
93+
//options->target_file_size_base = 64 * 1048576;
9494

9595
if (use_direct_io > 0)
9696
options->use_direct_reads = true;
@@ -192,42 +192,7 @@ void testScan(const std::string& key_path, rocksdb::DB* db, uint64_t key_count)
192192
std::cout << "elapsed: " << (static_cast<double>(elapsed) / 1000000000.) << "\n";
193193
std::cout << "throughput: " << (static_cast<double>(key_count) / (static_cast<double>(elapsed) / 1000000000.)) << "\n";
194194
}
195-
/*
196-
void warmup(rocksdb::DB* db, uint64_t key_range, uint64_t query_count) {
197-
struct timespec ts_start;
198-
struct timespec ts_end;
199-
uint64_t elapsed;
200-
201-
std::cout << "warming up\n";
202-
clock_gettime(CLOCK_MONOTONIC, &ts_start);
203-
204-
for (uint64_t i = 0; i < query_count; i++) {
205-
uint64_t key = key_range / query_count * i + 1;
206-
key = htobe64(key);
207-
208-
rocksdb::Slice s_key(reinterpret_cast<const char*>(&key), sizeof(key));
209-
std::string s_value;
210-
uint64_t value;
211-
212-
rocksdb::Status status = db->Get(rocksdb::ReadOptions(), s_key, &s_value);
213-
214-
if (status.ok()) {
215-
assert(s_value.size() >= sizeof(uint64_t));
216-
value = *reinterpret_cast<const uint64_t*>(s_value.data());
217-
(void)value;
218-
}
219-
}
220-
221-
clock_gettime(CLOCK_MONOTONIC, &ts_end);
222-
elapsed = static_cast<uint64_t>(ts_end.tv_sec) * 1000000000UL +
223-
static_cast<uint64_t>(ts_end.tv_nsec) -
224-
static_cast<uint64_t>(ts_start.tv_sec) * 1000000000UL +
225-
static_cast<uint64_t>(ts_start.tv_nsec);
226195

227-
std::cout << "elapsed: " << (static_cast<double>(elapsed) / 1000000000.) << "\n";
228-
std::cout << "throughput: " << (static_cast<double>(query_count) / (static_cast<double>(elapsed) / 1000000000.)) << "\n";
229-
}
230-
*/
231196
void warmup(const std::string key_path, uint64_t key_count, uint64_t sample_gap, rocksdb::DB* db) {
232197
std::ifstream keyFile(key_path);
233198
std::vector<uint64_t> keys;
@@ -360,17 +325,6 @@ void benchOpenRangeQuery(rocksdb::DB* db, rocksdb::Options* options, uint64_t ke
360325
uint64_t j = 0;
361326
for (it->Seek(s_key); it->Valid() && j < scan_length; it->Next(), j++) {
362327
uint64_t found_key = *reinterpret_cast<const uint64_t*>(it->key().data());
363-
364-
//std::cout << std::hex << found_key << std::dec << "========================================\n";
365-
366-
//if (i < 20)
367-
//std::cout << std::hex << found_key << std::dec << "\n";
368-
/*
369-
for (int k = 0; k < 8; k++)
370-
std::cout << std::hex << (uint16_t)it->key().data()[k] << " ";
371-
std::cout << std::dec << "\n";
372-
*/
373-
374328
assert(it->value().size() >= sizeof(uint64_t));
375329
value = *reinterpret_cast<const uint64_t*>(it->value().data());
376330
(void)value;
@@ -461,70 +415,8 @@ void benchClosedRangeQuery(rocksdb::DB* db, rocksdb::Options* options, uint64_t
461415
std::cout << latencies;
462416
}
463417

464-
/*
465-
void benchClosedRangeQuery(rocksdb::DB* db, uint64_t key_count, uint64_t key_gap,
466-
uint64_t query_count, uint64_t range_size) {
467-
std::random_device rd;
468-
std::mt19937_64 e(rd());
469-
std::uniform_int_distribution<unsigned long long> dist(0, (key_count * key_gap));
470-
471-
std::vector<uint64_t> query_keys;
472-
473-
for (uint64_t i = 0; i < query_count; i++) {
474-
uint64_t r = dist(e);
475-
query_keys.push_back(r);
476-
}
477-
478-
struct timespec ts_start;
479-
struct timespec ts_end;
480-
uint64_t elapsed;
481-
482-
printf("closed range query\n");
483-
rocksdb::Iterator* it = db->NewIterator(rocksdb::ReadOptions());
484-
485-
clock_gettime(CLOCK_MONOTONIC, &ts_start);
486-
487-
uint64_t count = 0;
488-
489-
for (uint64_t i = 0; i < query_count; i++) {
490-
uint64_t key = query_keys[i];
491-
key = htobe64(key);
492-
rocksdb::Slice s_key(reinterpret_cast<const char*>(&key), sizeof(key));
493-
494-
uint64_t until_key = query_keys[i] + 1000000;
495-
until_key = htobe64(until_key);
496-
rocksdb::Slice s_until_key(reinterpret_cast<const char*>(&until_key), sizeof(until_key));
497-
bool inclusive = false;
498-
499-
std::string s_value;
500-
uint64_t value;
501-
502-
uint64_t j = 0;
503-
for (it->SeekUntil(s_key, s_until_key, inclusive); it->Valid(); it->Next(), j++) {
504-
uint64_t found_key = *reinterpret_cast<const uint64_t*>(it->key().data());
505-
if (be64toh(found_key) >= be64toh(until_key))
506-
break;
507-
count++;
508-
}
509-
}
510-
511-
std::cout << "count per op = " << ((count + 0.0) / query_count) << "\n";
512-
513-
clock_gettime(CLOCK_MONOTONIC, &ts_end);
514-
elapsed = static_cast<uint64_t>(ts_end.tv_sec) * 1000000000UL +
515-
static_cast<uint64_t>(ts_end.tv_nsec) -
516-
static_cast<uint64_t>(ts_start.tv_sec) * 1000000000UL +
517-
static_cast<uint64_t>(ts_start.tv_nsec);
518-
519-
std::cout << "elapsed: " << (static_cast<double>(elapsed) / 1000000000.) << "\n";
520-
std::cout << "throughput: " << (static_cast<double>(query_count) / (static_cast<double>(elapsed) / 1000000000.)) << "\n";
521-
522-
delete it;
523-
}
524-
*/
525-
526418
void printIO() {
527-
FILE* fp = fopen("/sys/block/sda/sda2/stat", "r");
419+
FILE* fp = fopen("/sys/block/sda/sda1/stat", "r");
528420
if (fp == NULL) {
529421
printf("Error: empty fp\n");
530422
printf("%s\n", strerror(errno));
@@ -538,7 +430,7 @@ void printIO() {
538430
}
539431

540432
uint64_t getIOCount() {
541-
std::ifstream io_file(std::string("/sys/block/sda/sda2/stat"));
433+
std::ifstream io_file(std::string("/sys/block/sda/sda1/stat"));
542434
uint64_t io_count = 0;
543435
io_file >> io_count;
544436
return io_count;
@@ -572,18 +464,19 @@ int main(int argc, const char* argv[]) {
572464
std::cout << "\t0: no filter\n";
573465
std::cout << "\t1: Bloom filter\n";
574466
std::cout << "\t2: SuRF\n";
575-
std::cout << "\t2: SuRF Hash\n";
576-
std::cout << "\t2: SuRF Real\n";
467+
std::cout << "\t3: SuRF Hash\n";
468+
std::cout << "\t4: SuRF Real\n";
577469
std::cout << "arg 3: compression?\n";
578470
std::cout << "\t0: no compression\n";
579471
std::cout << "\t1: Snappy\n";
580472
std::cout << "arg 4: use direct I/O?\n";
581473
std::cout << "\t0: no\n";
582474
std::cout << "\t1: yes\n";
583475
std::cout << "arg 5: query type\n";
584-
std::cout << "\t0: point query\n";
585-
std::cout << "\t1: open range query\n";
586-
std::cout << "\t2: closed range query\n";
476+
std::cout << "\t0: init\n";
477+
std::cout << "\t1: point query\n";
478+
std::cout << "\t2: open range query\n";
479+
std::cout << "\t3: closed range query\n";
587480
std::cout << "arg 6: range size\n";
588481
std::cout << "arg 7: warmup # of queries\n";
589482
return -1;
@@ -597,20 +490,19 @@ int main(int argc, const char* argv[]) {
597490
uint64_t range_size = (uint64_t)atoi(argv[6]);
598491
uint64_t warmup_query_count = (uint64_t)atoi(argv[7]);
599492
uint64_t scan_length = 1;
600-
//uint64_t range_size = 5000000;
601493

602-
const std::string kKeyPath = "/home/huanchen/rocksdb/filter_experiment/poisson_timestamps.csv";
494+
const std::string kKeyPath = "poisson_timestamps.csv";
603495
const uint64_t kValueSize = 1000;
604496
const uint64_t kKeyRange = 10000000000000;
605497
const uint64_t kQueryCount = 50000;
606498

607499
// 2GB config
608-
//const uint64_t kKeyCount = 2000000;
609-
//const uint64_t kWarmupSampleGap = 100;
500+
const uint64_t kKeyCount = 2000000;
501+
const uint64_t kWarmupSampleGap = 100;
610502

611503
// 100GB config
612-
const uint64_t kKeyCount = 100000000;
613-
const uint64_t kWarmupSampleGap = kKeyCount / warmup_query_count;
504+
//const uint64_t kKeyCount = 100000000;
505+
//const uint64_t kWarmupSampleGap = kKeyCount / warmup_query_count;
614506

615507
//=========================================================================
616508

@@ -620,6 +512,9 @@ int main(int argc, const char* argv[]) {
620512

621513
init(kKeyPath, db_path, &db, &options, &table_options, use_direct_io, kKeyCount, kValueSize, filter_type, compression_type);
622514

515+
if (query_type == 0)
516+
return 0;
517+
623518
//=========================================================================
624519

625520
//testScan(db, kKeyCount);
@@ -635,34 +530,34 @@ int main(int argc, const char* argv[]) {
635530
//std::cout << options.statistics->ToString() << "\n";
636531
//printIO();
637532

638-
uint64_t mem_free_after = getMemFree();
639-
uint64_t mem_available_after = getMemAvailable();
640-
std::cout << "Mem Free diff: " << (mem_free_before - mem_free_after) << "\n";
641-
std::cout << "Mem Aavilable diff: " << (mem_available_before - mem_available_after) << "\n";
533+
//uint64_t mem_free_after = getMemFree();
534+
//uint64_t mem_available_after = getMemAvailable();
535+
//std::cout << "Mem Free diff: " << (mem_free_before - mem_free_after) << "\n";
536+
//std::cout << "Mem Aavilable diff: " << (mem_available_before - mem_available_after) << "\n";
642537

643538
uint64_t io_before = getIOCount();
644-
mem_free_before = getMemFree();
645-
mem_available_before = getMemAvailable();
539+
//mem_free_before = getMemFree();
540+
//mem_available_before = getMemAvailable();
646541

647-
if (query_type == 0)
542+
if (query_type == 1)
648543
benchPointQuery(db, &options, kKeyRange, kQueryCount);
649-
else if (query_type == 1)
650-
benchOpenRangeQuery(db, &options, kKeyRange, kQueryCount, scan_length);
651544
else if (query_type == 2)
545+
benchOpenRangeQuery(db, &options, kKeyRange, kQueryCount, scan_length);
546+
else if (query_type == 3)
652547
benchClosedRangeQuery(db, &options, kKeyRange, kQueryCount, range_size);
653548

654549
uint64_t io_after = getIOCount();
655-
mem_free_after = getMemFree();
656-
mem_available_after = getMemAvailable();
550+
//mem_free_after = getMemFree();
551+
//mem_available_after = getMemAvailable();
657552
//std::cout << options.statistics->ToString() << "\n";
658553
//std::string stats;
659554
//db->GetProperty(rocksdb::Slice("rocksdb.stats"), &stats);
660555
//std::cout << stats << "\n";
661556
//printIO();
662557

663558
std::cout << "I/O count: " << (io_after - io_before) << "\n";
664-
std::cout << "Mem Free diff: " << (mem_free_before - mem_free_after) << "\n";
665-
std::cout << "Mem Aavilable diff: " << (mem_available_before - mem_available_after) << "\n";
559+
//std::cout << "Mem Free diff: " << (mem_free_before - mem_free_after) << "\n";
560+
//std::cout << "Mem Aavilable diff: " << (mem_available_before - mem_available_after) << "\n";
666561

667562
close(db);
668563

table/block_based_table_reader.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ class BlockBasedTable : public TableReader {
205205

206206
friend class PartitionIndexReader;
207207

208-
protected:
208+
//protected:
209+
public:
209210
template <class TValue>
210211
struct CachableEntry;
211212
struct Rep;

third-party/SuRF

Submodule SuRF updated 1 file

util/surf.cc

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,13 @@ class FullSuRFBitsBuilder : public FilterBitsBuilder {
3535
}
3636

3737
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
38-
surf::SuRF* filter = new surf::SuRF(keys_, include_dense_, sparse_dense_ratio_,
39-
suffix_type_, suffix_len_);
38+
surf::SuRF* filter;
39+
if (suffix_type_ == surf::SuffixType::kHash)
40+
filter = new surf::SuRF(keys_, include_dense_, sparse_dense_ratio_,
41+
suffix_type_, suffix_len_, 0);
42+
else
43+
filter = new surf::SuRF(keys_, include_dense_, sparse_dense_ratio_,
44+
suffix_type_, 0, suffix_len_);
4045
uint64_t size = filter->serializedSize();
4146
char* data = filter->serialize();
4247
filter->destroy();
@@ -121,9 +126,14 @@ class SuRFPolicy : public FilterPolicy {
121126
std::vector<std::string> keys_str;
122127
for (size_t i = 0; i < (size_t)n; i++)
123128
keys_str.push_back(std::string(keys[i].data(), keys[i].size()));
124-
125-
surf::SuRF* filter = new surf::SuRF(keys_str, include_dense_, sparse_dense_ratio_,
126-
suffix_type_, suffix_len_);
129+
130+
surf::SuRF* filter;
131+
if (suffix_type_ == surf::SuffixType::kHash)
132+
filter = new surf::SuRF(keys_str, include_dense_, sparse_dense_ratio_,
133+
suffix_type_, suffix_len_, 0);
134+
else
135+
filter = new surf::SuRF(keys_str, include_dense_, sparse_dense_ratio_,
136+
suffix_type_, 0, suffix_len_);
127137
uint64_t size = filter->serializedSize();
128138
char* data = filter->serialize();
129139
dst->append(data, size);

0 commit comments

Comments
 (0)