Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
105 commits
Select commit Hold shift + click to select a range
45985ed
enable write flushing so put actually touches the database
jbellis Oct 23, 2012
2f0cb4d
rename getHTable to initHTable
jbellis Oct 23, 2012
fee3847
remove unnecessary lock and call close during cleanup
jbellis Oct 23, 2012
d3b6223
make fields private
jbellis Oct 23, 2012
61e628c
r/m dead code
jbellis Oct 23, 2012
ac37b03
use Collections.singletonMap instead of single-entry HashMaps
jbellis Oct 23, 2012
285b2ce
switch to four-space indentation
jbellis Oct 23, 2012
94a7d63
make fields private
jbellis Oct 23, 2012
6c51088
r/m cassandra client 8 that had purely cosmetic differences from 7
jbellis Oct 24, 2012
21d15c2
replace printing of just exception messages with full stack trace
jbellis Dec 13, 2012
358d563
fix mongodb result decoding
jbellis Dec 13, 2012
0e67d81
CQL
jbellis Oct 18, 2013
9fc766e
merge from bfc and update to C* 2.0.7
jbellis May 21, 2014
ca89d97
cleanup
jbellis May 21, 2014
7198059
merge thumbtack
jbellis May 22, 2014
1af3ec4
move Fabric stuff into its own subdirectory
jbellis May 22, 2014
ac3316d
move reporting scripts into own subdirectory
jbellis May 22, 2014
d5351c8
r/m dead code
jbellis May 22, 2014
a4a3cbd
r/m unused imports
jbellis May 22, 2014
b77f583
goodbye, hector
jbellis May 22, 2014
afdbb98
r/m synchronized and fix concurrency bugs
jbellis May 22, 2014
24def6d
cleanup
jbellis May 22, 2014
5b95298
couchbase defaults to persisted to disk on at least one replica (same…
jbellis May 22, 2014
b444801
Disabling aerospike for now as the maven dependencies seem to be wrong.
May 22, 2014
31148af
Simplify couchbase and membase configuration
jbellis May 22, 2014
51d8d72
Adds additional cleanup of unused jars.
May 28, 2014
0f641f1
Ignore fabric's .pyc files
May 28, 2014
f9d2f4a
Makes CassandraCQLClient look for a comma separated hosts parameter l…
May 28, 2014
71a7193
switch CB default persistTo/replicateTo to Master/Zero
jbellis Jun 1, 2014
1fbd7d3
clean up mongo options parsing and default to fsync_safe
jbellis Jun 2, 2014
0ba24ea
update to hbase 0.98 and default to fsync_wal
jbellis Jun 2, 2014
25fb656
fix getInteger -> parseInt
jbellis Jun 3, 2014
fa34f7e
add note on durability
jbellis Jun 9, 2014
00b050e
cleanup
jbellis Jun 9, 2014
5dd6138
mention Fabric
jbellis Jun 9, 2014
50833ce
fix NPE
jbellis Jun 10, 2014
9ea6f82
r/m cluster refcounting that didn't actually work
jbellis Jun 30, 2014
16e7e8b
Part1: fix #2 by modifying READ / SCAN / UPDATE signatures
Jul 10, 2014
ab52ff5
r/m couchbase 1.8
jbellis Jul 11, 2014
21534fd
r/m hypertable
jbellis Jul 11, 2014
e3a46da
r/m elasticsearch
jbellis Jul 11, 2014
37d59f2
r/m gemfire
jbellis Jul 11, 2014
dc6e098
r/m mapkeeper
jbellis Jul 11, 2014
2212965
r/m infinispan
jbellis Jul 11, 2014
258714c
r/m membase
jbellis Jul 11, 2014
45dc7fe
r/m OrientDB
jbellis Jul 11, 2014
e82ede1
r/m nosqldb
jbellis Jul 11, 2014
a45217c
r/m redis
jbellis Jul 11, 2014
55bdbac
r/m voldemort
jbellis Jul 11, 2014
7723268
refactor to avoid Vector in DB signature
jbellis Jul 11, 2014
8b948f8
Part2: fixes #2 - Switches from strings to pre-build prepared statem…
Jul 12, 2014
c5e6fec
Part 3: fixes #2 - merge master into preparedstmt
Jul 13, 2014
54af8b1
Part 4: fixes #2 - Update MongoDB client
Jul 14, 2014
a70f534
Part 5: fixes #2 - Update DynamoDB client
Jul 14, 2014
9fddc22
Part 6: fixes #2 - Update JDBC client
Jul 14, 2014
1c9dd11
Part 7: fixes #2 - Update Memcached client
Jul 14, 2014
99fda79
Part 8: fixes #2 - Update Couchbase2 Client (just add back to pom sin…
Jul 14, 2014
96b7d6d
Part 9: fixes #2 - Update Accumulo client
Jul 14, 2014
a294104
Part 10: fixes #2 - Update HBase client
Jul 14, 2014
d2e6af0
Part 11: fixes #2 - Update Aerospike client for Aerospike v3, and mod…
Jul 14, 2014
d06eb09
Part 12: fixes #2 - tidy up formatting
Jul 15, 2014
ce132ce
Part 13: fixes #2 - Set consistancy levels for all prepared statements
Jul 15, 2014
1c8f02b
cleanup
jbellis Jul 16, 2014
3c39a03
Modify CassandraCQLClient#updateOne to use a map of prepared statemen…
Jul 16, 2014
2ec9ec4
Merge pull request #5 from lyubent/psv2
Jul 16, 2014
f6dd051
cleanup
jbellis Jul 16, 2014
c578285
Bump client driver versions
Jul 18, 2014
d478097
Allow classpath overrides, work around maven oddity
Jul 18, 2014
7f58147
avoid round-tripping through String when YCSB native type is bytes
jbellis Jul 30, 2014
7ae7119
simplify
jbellis Jul 30, 2014
8c75182
Merge branch 'master' of github.com:jbellis/YCSB
jbellis Jul 30, 2014
8c16138
fix binding
jbellis Jul 30, 2014
053033f
avoid unnecessary LIMIT
jbellis Jul 31, 2014
d01f17b
Fix key generation, especially for mixed workloads
thobbs Aug 29, 2014
1107926
actually apply the requested hbase durability
jbellis Sep 15, 2014
d24466d
rename away underscores
jbellis Sep 23, 2014
fe57856
import apurtell's hbase work
jbellis Sep 23, 2014
4f3de5b
reformat
jbellis Sep 23, 2014
6e65ecb
apply refactoring and durability changes
jbellis Sep 23, 2014
1b2bb51
be less aggressive about thread counts, we will spin up plenty of cli…
jbellis Sep 24, 2014
fe562c7
dos2unix
jbellis Sep 30, 2014
d39e5e5
format
jbellis Sep 30, 2014
bba3e14
remove dangerously unperformant code
jbellis Sep 30, 2014
e6ed6d9
correct initial generator to the right bounds
jbellis Oct 1, 2014
23b870b
Revert back to 1b2bb51fcd217afd6e7a513de6a4470c5b347ead
jbellis Oct 1, 2014
ea48f58
back to Tyler's incremental zipfian, but fix the bounds so we don't n…
jbellis Oct 1, 2014
6bd4bcb
dos2unix
jbellis Oct 3, 2014
6f9fc73
Don't create a billion objects in ScrambledZipfian. This still prese…
jbellis Oct 3, 2014
eaa3bfc
Only use highest-readable key for dists like "latest"
thobbs Oct 7, 2014
156a40d
cleanup and avoid no-op configuration post-build
jbellis Oct 9, 2014
c11c4bf
cleanup
jbellis Oct 10, 2014
09a3c3d
remove retry loops
jbellis Oct 10, 2014
2e27ca6
rename table -> keyspace
jbellis Oct 10, 2014
0297573
cleanup
jbellis Oct 10, 2014
9b418a6
optimize updateOne by calling insert instead of batch_mutate
jbellis Oct 10, 2014
ad45988
fix bug scanning with readallfields=false
jbellis Oct 13, 2014
507e88c
Add assert for missing rows
thobbs Oct 14, 2014
da62ca9
default to CoreWorkload
jbellis Nov 18, 2014
47ef55f
make CREATE copy-paste-able
jbellis Nov 18, 2014
0fbf097
r/m unused exportMeasurements method
jbellis Nov 18, 2014
ace3ea5
omit histogram buckets with zero count
jbellis Nov 18, 2014
607e808
spread the ops out across threads evenly, but add the remainder after…
jbellis Nov 19, 2014
b4766ac
Merge MongoDB changes from achille/master
thobbs Nov 25, 2014
5088a6a
Make "journaled" the default Mongo write concern
thobbs Nov 25, 2014
217020b
Remove int size limit on recordcount
Jan 6, 2015
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# ignore compiled byte code
target
*.pyc

# ignore output files from testing
output*
Expand All @@ -8,3 +9,7 @@ output*
.project
.classpath
.settings

# and intellij
.idea/
*.iml
16 changes: 15 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
- gh-95 Bump MongoDB version to 2.9.0 (allanbank)
- Upgraded MongoDB client to v.2.10.1 (thumbtack)
- Added mongodb.readPreference property (thumbtack)
- Added Aerospike client (thumbtack)
- Throttling improvement: the throughput of last 100ms is used (thumbtack)
- Output improvements, exportmeasurementsinterval property (thumbtack)
- Added warm-up phase before run the workload (thumbtack)
- Added fieldnameprefix property (thumbtack)
- Added retries: readretrycount, updateretrycount, insertretrycount, retrydelay
properties (thumbtack)
- Added ignoreinserterrors property (thumbtack)
- Upgraded Couchbase client to v.1.1.0 and Spymemcached client to v.2.8.9
(thumbtack)
- Added couchbase.replicateTo property (thumbtack)
- Added Fabric scripts to run YCSB on multiple hosts (thumbtack)

- gh-67 Use checkstyle (m1ch1)
- gh-76 Implemented OrientDB client (lvca)
- gh-88 YCSB client for Amazon DynamoDB (jananin)
Expand Down
95 changes: 95 additions & 0 deletions IMPROVEMENTS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
Here is the list of YCSB improments made by Thumbtack Technology.

Upgrade of MongoDB client

We upgraded the MongoDB driver from version 2.8.0 (appeared before
Mongo 2.2 release) to version 2.10.1 and allow the readPreference
(http://docs.mongodb.org/manual/applications/replication/#read-preference)
to be set as a configuration.
Also, now all the write errors are printed to stderr.

New configuration properties
mongodb.readPreference = primary|primaryPreferred|secondary|secondaryPreferred

Improvements of Aerospike client

We added ability to display operations result code in more details:
no only success or erroneous, but displaying the actual error type.

Throttling improvements

YCSB allows to limit the throughput, but uses the average throughput for
the whole time of the experiment. It causes the throughput peaks after
downfalls in failover tests. We modified YCSB to keep the desired throughput
on the same level, without peaks, by throttling based on the average throughput
on the last 100 ms.

Output improvements

Print current statistics to stderr every 2 secs instead of 10 secs
Print intermediate statistics (identical to final) to stdout in every
configured time interval. To avoid stats loss on YCSB hang or crash
Print final statistics on YCSB process shutdown

New configuration properties
exportmeasurementsinterval: interval time for exporting measurements in out
stream in milliseconds (default: 1000)

Warm-up

Allow to do some number of operations before gathering statistics, to warm-up
the database. The length of warm-up can be limited by the number of operations
or by the time period.

New configuration properties
warmupoperationcount: number of operations in warmup phase,
if zero then don't warmup (default: 0)
warmupexecutiontime: execution time of warmup phase in milliseconds,
if zero then don't warmup (default: 0)

Field name

By default YCSB names the database record fields as “field” + a number.
The new configuration option allows to replace the “field” prefix with
something shorter which affects the data storage size for schema-less databases.

New configuration properties
fieldnameprefix: string prefix for the field name (default: “field”)

Retries

Added ability to retry failed operations. The max number of retries is limited.
The total number of retries is counted. The retries are done within the same
operation, so they doesn’t affect the number of operations, but increases
the latency of the operation. The retries can be done with delays between them.
Original YCSB stops on error on insert, now it’s possible to retry the insert.

New configuration properties
readretrycount: number of retries if read fails,
if zero then don't retry (default: 0)
updateretrycount: number of retries if update fails,
if zero then don't retry (default: 0)
insertretrycount: number of retries if insert fails,
if zero then don't retry (default: 0)
retrydelay: delay between retries in milliseconds (default: 0)

Inserts with errors

The new configuration option was added to allow errors on inserts.
Usually YCSB stops when any operation fails on load phase. But now it’s possible
to ignore such errors and continue inserting. This can be useful for MongoDB,
for example, to rerun the load phase without clearing the data.

New configuration properties
ignoreinserterrors: set to true to activate the new feature

Upgrade of Couchbase client

We upgraded the Couchbase driver from version 1.1-dp2 to 1.1.0 and Spymemcached
driver from version 2.8.4 to 2.8.9 and allow the replicateTo
(http://www.couchbase.com/autodocs/couchbase-java-client-1.1.0/com/couchbase/client/CouchbaseClient.html#replace(java.lang.String, int, java.lang.String, net.spy.memcached.PersistTo, net.spy.memcached.ReplicateTo))
to be set as a configuration.

New configuration properties
couchbase.replicateTo = ZERO|ONE|TWO|THREE

22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
Yahoo! Cloud System Benchmark (YCSB)
====================================
[![Build Status](https://travis-ci.org/brianfrankcooper/YCSB.png?branch=master)](https://travis-ci.org/brianfrankcooper/YCSB)

A note on comparing multiple systems
------------------------------------

NoSQL systems have widely varying defaults for trading off write durability vs performance. Make sure that you are [comparing apples to apples across all candidates](http://www.datastax.com/dev/blog/how-not-to-benchmark-cassandra-a-case-study). The most useful common denominator is synchronously durable writes. The following YCSB clients have been verified to perform synchronously durable writes by default:

- Couchbase
- HBase
- MongoDB

Cassandra requires a configuration change in conf/cassandra.yaml. Uncomment these lines:

# commitlog_sync: batch
# commitlog_sync_batch_window_in_ms: 50

Links
-----
http://wiki.github.com/brianfrankcooper/YCSB/
http://wiki.github.com/jbellis/YCSB/
http://research.yahoo.com/Web_Information_Management/YCSB/
ycsb-users@yahoogroups.com

Expand All @@ -14,7 +27,7 @@ Getting Started
1. Download the latest release of YCSB:

```sh
wget https://github.com/downloads/brianfrankcooper/YCSB/ycsb-0.1.4.tar.gz
wget https://github.com/downloads/jbellis/YCSB/ycsb-0.1.4.tar.gz
tar xfvz ycsb-0.1.4
cd ycsb-0.1.4
```
Expand All @@ -36,3 +49,6 @@ Getting Started

See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties for
the list of available workload properties.

Alternatively, see fabric/README for Thumbtack's work on parallelizing
YCSB clients using Fabric.
3 changes: 3 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__author__ = 'nick'
# this file is needed to make relative imports possible in Python
# specifically to access ./conf/* from ./fabfile/*
94 changes: 60 additions & 34 deletions accumulo/src/main/java/com/yahoo/ycsb/db/AccumuloClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.client.AccumuloException;
Expand Down Expand Up @@ -171,8 +171,16 @@ private Scanner getRow(Text row, Set<String> fields)
}

@Override
public int read(String table, String key, Set<String> fields,
HashMap<String, ByteIterator> result) {
public int readOne(String table, String key, String field, Map<String,ByteIterator> result) {
return read(table, key, result);
}

@Override
public int readAll(String table, String key, Map<String,ByteIterator> result) {
return read(table, key, result);
}

public int read(String table, String key, Map<String, ByteIterator> result) {

try {
checkTable(table);
Expand All @@ -198,8 +206,26 @@ public int read(String table, String key, Set<String> fields,
}

@Override
public int scanOne(String table, String startkey, int recordcount, String field, List<Map<String, ByteIterator>> result) {

_scanScanner.clearColumns();
_scanScanner.setRange(new Range(new Text(startkey), null));
_scanScanner.fetchColumn(_colFam, new Text(field));

return scan(table, startkey, recordcount, result);
}

@Override
public int scanAll(String table, String startkey, int recordcount, List<Map<String, ByteIterator>> result) {

_scanScanner.clearColumns();
_scanScanner.setRange(new Range(new Text(startkey), null));

return scan(table, startkey, recordcount, result);
}

public int scan(String table, String startkey, int recordcount,
Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
List<Map<String, ByteIterator>> result) {
try {
checkTable(table);
} catch (TableNotFoundException e) {
Expand All @@ -214,19 +240,11 @@ public int scan(String table, String startkey, int recordcount,
_scanScanner.setRange(new Range(new Text(startkey), null));

// Batch size is how many key/values to try to get per call. Here, I'm
// guessing that the number of keys in a row is equal to the number of fields
// guessing that the number of keys in a row is equal to the number of fields
// we're interested in.
// We try to fetch one more so as to tell when we've run out of fields.
// We try to fetch one or more using either ScanOne or scanAll respectively so
// as to tell when we've run out of fields.

if (fields != null) {
// And add each of them as fields we want.
for(String field:fields)
{
_scanScanner.fetchColumn(_colFam, new Text(field));
}
} else {
// If no fields are provided, we assume one column/row.
}

String rowKey = "";
HashMap<String, ByteIterator> currentHM = null;
Expand All @@ -240,15 +258,8 @@ public int scan(String table, String startkey, int recordcount,
break;
}
rowKey = entry.getKey().getRow().toString();
if (fields != null) {
// Initial Capacity for all keys.
currentHM = new HashMap<String, ByteIterator>(fields.size());
}
else
{
// An empty result map.
currentHM = new HashMap<String, ByteIterator>();
}
currentHM = new HashMap<String, ByteIterator>();

result.add(currentHM);
}
// Now add the key to the hashmap.
Expand All @@ -262,13 +273,17 @@ public int scan(String table, String startkey, int recordcount,
}

@Override
public int update(String table, String key, HashMap<String, ByteIterator> values) {
try {
checkTable(table);
} catch (TableNotFoundException e) {
System.err.println("Error trying to connect to Accumulo table." + e);
return ServerError;
}
public int updateOne(String table, String key, String field, ByteIterator value) {

Mutation mutInsert = new Mutation(new Text(key));
mutInsert.put(_colFam, new Text(field), System.currentTimeMillis(),
new Value(value.toArray()));

return update(table, key, mutInsert);
}

@Override
public int updateAll(String table, String key, Map<String,ByteIterator> values) {

Mutation mutInsert = new Mutation(new Text(key));
for (Map.Entry<String, ByteIterator> entry : values.entrySet()) {
Expand All @@ -277,6 +292,17 @@ public int update(String table, String key, HashMap<String, ByteIterator> values
new Value(entry.getValue().toArray()));
}

return update(table, key, mutInsert);
}

public int update(String table, String key, /*Map<String, ByteIterator> values*/Mutation mutInsert) {
try {
checkTable(table);
} catch (TableNotFoundException e) {
System.err.println("Error trying to connect to Accumulo table." + e);
return ServerError;
}

try {
_bw.addMutation(mutInsert);
// Distributed YCSB co-ordination: YCSB on a client produces the key to
Expand All @@ -296,8 +322,8 @@ public int update(String table, String key, HashMap<String, ByteIterator> values
}

@Override
public int insert(String table, String key, HashMap<String, ByteIterator> values) {
return update(table, key, values);
public int insert(String table, String key, Map<String, ByteIterator> values) {
return updateAll(table, key, values);
}

@Override
Expand Down Expand Up @@ -381,7 +407,7 @@ private void keyNotification(String key) {
fields.add("field"+j);
HashMap<String,ByteIterator> result = new HashMap<String,ByteIterator>();

int retval = read(table, strKey, fields, result);
int retval = read(table, strKey, result);
//If the results are empty, the key is enqueued in Zookeeper
//and tried again, until the results are found.
if (result.size() == 0) {
Expand Down
Loading