Skip to content

Commit

Permalink
Added percentile of partition sizes and sstables
Browse files Browse the repository at this point in the history
  • Loading branch information
Cameron Zemek committed Aug 8, 2018
1 parent 4c8c0ca commit a697824
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 42 deletions.
36 changes: 32 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,24 @@ Summary: Summary statistics about partitions
| Total (SSTable) | Number of sstables on this node |
| Minimum (Size) | Minimum uncompressed partition size |
| Minimum (SSTable) | Minimum number of sstables a partition belongs to |
| Maximum (Size) | Maximum uncompressed partition size |
| Maximum (SSTable) | Maximum number of sstables a partition belongs to |
| Average (Size) | Average (mean) uncompressed partition size |
| Average (SSTable) | Average (mean) number of sstables a partition belongs to |
| std dev. (Size) | Standard deviation of partition sizes |
| std dev. (SSTable) | Standard deviation of number of sstables for a partition |
| 50% (Size) | Estimated 50th percentile of partition sizes |
| 50% (SSTable) | Estimated 50th percentile of sstables for a partition |
| 75% (Size) | Estimated 75th percentile of partition sizes |
| 75% (SSTable) | Estimated 75th percentile of sstables for a partition |
| 90% (Size) | Estimated 90th percentile of partition sizes |
| 90% (SSTable) | Estimated 90th percentile of sstables for a partition |
| 95% (Size) | Estimated 95th percentile of partition sizes |
| 95% (SSTable) | Estimated 95th percentile of sstables for a partition |
| 99% (Size) | Estimated 99th percentile of partition sizes |
| 99% (SSTable) | Estimated 99th percentile of sstables for a partition |
| 99.9% (Size) | Estimated 99.9th percentile of partition sizes |
| 99.9% (SSTable) | Estimated 99.9th percentile of sstables for a partition |
| Maximum (Size) | Maximum uncompressed partition size |
| Maximum (SSTable) | Maximum number of sstables a partition belongs to |

Largest partitions: The top N largest partitions

Expand Down Expand Up @@ -145,10 +159,24 @@ Summary: Summary statistics about partitions
| Total (SSTable) | Number of sstables on this node |
| Minimum (Size) | Minimum uncompressed partition size |
| Minimum (SSTable) | Minimum number of sstables a partition belongs to |
| Maximum (Size) | Maximum uncompressed partition size |
| Maximum (SSTable) | Maximum number of sstables a partition belongs to |
| Average (Size) | Average (mean) uncompressed partition size |
| Average (SSTable) | Average (mean) number of sstables a partition belongs to |
| std dev. (Size) | Standard deviation of partition sizes |
| std dev. (SSTable) | Standard deviation of number of sstables for a partition |
| 50% (Size) | Estimated 50th percentile of partition sizes |
| 50% (SSTable) | Estimated 50th percentile of sstables for a partition |
| 75% (Size) | Estimated 75th percentile of partition sizes |
| 75% (SSTable) | Estimated 75th percentile of sstables for a partition |
| 90% (Size) | Estimated 90th percentile of partition sizes |
| 90% (SSTable) | Estimated 90th percentile of sstables for a partition |
| 95% (Size) | Estimated 95th percentile of partition sizes |
| 95% (SSTable) | Estimated 95th percentile of sstables for a partition |
| 99% (Size) | Estimated 99th percentile of partition sizes |
| 99% (SSTable) | Estimated 99th percentile of sstables for a partition |
| 99.9% (Size) | Estimated 99.9th percentile of partition sizes |
| 99.9% (SSTable) | Estimated 99.9th percentile of sstables for a partition |
| Maximum (Size) | Maximum uncompressed partition size |
| Maximum (SSTable) | Maximum number of sstables a partition belongs to |

Largest partitions: Partitions with largest uncompressed size

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,11 @@ public static void main(String[] args) {
System.exit(0);
}

long minPartitionSize = Long.MAX_VALUE;
long maxPartitionSize = 0;
Histogram sizeHistogram = new Histogram();
Histogram sstableHistogram = new Histogram();
long partitionCount = 0;
long rowCount = 0;
long rowDeleteCount = 0;
long totalPartitionSize = 0;
int minTables = Integer.MAX_VALUE;
int maxTables = 0;
long totalTables = 0;

MinMaxPriorityQueue<PartitionStatistics> largestPartitions = MinMaxPriorityQueue
.orderedBy(PartitionStatistics.SIZE_COMPARATOR)
Expand Down Expand Up @@ -157,7 +153,6 @@ public static void main(String[] args) {
progressBar.updateProgress(0.0);
while ((pStats = partitionReader.read()) != null) {
progressBar.updateProgress(partitionReader.getProgress());
long partitionSize = pStats.size;
widestPartitions.add(pStats);
largestPartitions.add(pStats);
if (pStats.tombstoneCount > 0) {
Expand All @@ -167,16 +162,14 @@ public static void main(String[] args) {
mostDeletedRows.add(pStats);
}
tableCountLeaders.add(pStats);
minPartitionSize = Math.min(minPartitionSize, partitionSize);
maxPartitionSize = Math.max(maxPartitionSize, partitionSize);
totalPartitionSize += partitionSize;
minTables = Math.min(minTables, pStats.tableCount);
maxTables = Math.max(maxTables, pStats.tableCount);
totalTables += pStats.tableCount;
sizeHistogram.update(pStats.size);
sstableHistogram.update(pStats.tableCount);
rowCount += pStats.rowCount;
rowDeleteCount += pStats.rowDeleteCount;
partitionCount++;
}
sizeHistogram.snapshot();
sstableHistogram.snapshot();

cfProxy.close();

Expand All @@ -186,10 +179,17 @@ public static void main(String[] args) {
tb.addRow("Count", Long.toString(partitionCount), "");
tb.addRow("Rows", Long.toString(rowCount), "");
tb.addRow("(deleted)", Long.toString(rowDeleteCount), "");
tb.addRow("Total", Util.humanReadableByteCount(totalPartitionSize), Integer.toString(sstableReaders.size()));
tb.addRow("Minimum", Util.humanReadableByteCount(minPartitionSize), Integer.toString(minTables));
tb.addRow("Maximum", Util.humanReadableByteCount(maxPartitionSize), Integer.toString(maxTables));
tb.addRow("Average", Util.humanReadableByteCount(totalPartitionSize / partitionCount), String.format("%.1f", totalTables / (double) partitionCount));
tb.addRow("Total", Util.humanReadableByteCount(sizeHistogram.getTotal()), Integer.toString(sstableReaders.size()));
tb.addRow("Minimum", Util.humanReadableByteCount(sizeHistogram.getMin()), Long.toString(sstableHistogram.getMin()));
tb.addRow("Average", Util.humanReadableByteCount(Math.round(sizeHistogram.getMean())), String.format("%.1f", sstableHistogram.getMean()));
tb.addRow("std dev.", Util.humanReadableByteCount(Math.round(sizeHistogram.getStdDev())), String.format("%.1f", sstableHistogram.getStdDev()));
tb.addRow("50%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.5))), String.format("%.1f", sstableHistogram.getValue(0.5)));
tb.addRow("75%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.75))), String.format("%.1f", sstableHistogram.getValue(0.75)));
tb.addRow("90%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.9))), String.format("%.1f", sstableHistogram.getValue(0.9)));
tb.addRow("95%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.95))), String.format("%.1f", sstableHistogram.getValue(0.95)));
tb.addRow("99%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.99))), String.format("%.1f", sstableHistogram.getValue(0.99)));
tb.addRow("99.9%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.999))), String.format("%.1f", sstableHistogram.getValue(0.999)));
tb.addRow("Maximum", Util.humanReadableByteCount(sizeHistogram.getMax()), Long.toString(sstableHistogram.getMax()));
System.out.println(tb);

System.out.println("Largest partitions:");
Expand Down
180 changes: 180 additions & 0 deletions src/com/instaclustr/sstabletools/Histogram.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package com.instaclustr.sstabletools;

import java.util.Arrays;
import java.util.Random;

/**
* Implements a histogram using Algorithm R by Jeffrey Vitter. See https://en.wikipedia.org/wiki/Reservoir_sampling
*/
public class Histogram {
/**
* Default sampling size.
*/
private static final int DEFAULT_SIZE = 1028;

/**
* Number of values recorded.
*/
protected int count = 0;

/**
* Reservoir of values as per the Algorithm R.
*/
protected long[] reservoir;

/**
* Random index generator.
*/
protected final Random random = new Random();

/**
* The minimum value recorded.
*/
protected long min = Long.MAX_VALUE;

/**
* The maximum value recorded.
*/
protected long max = 0;

/**
* The total of all values.
*/
protected long total = 0;

public Histogram() {
this(DEFAULT_SIZE);
}

public Histogram(int sampleSize) {
this.reservoir = new long[sampleSize];
}

/**
* Count of values recorded.
*
* @return number of values recorded
*/
public int getCount() {
return count;
}

/**
* Update histogram with a value.
*
* @param value value to add
*/
public void update(long value) {
if (count < reservoir.length) {
// fill the reservoir array
reservoir[count] = value;
} else {
// replace elements with gradually decreasing probability
int i = random.nextInt(count + 1);
if (i < reservoir.length) {
reservoir[i] = value;
}
}
count++;
total += value;
min = Math.min(value, min);
max = Math.max(value, max);
}

/**
* Snapshot histogram.
*/
public void snapshot() {
Arrays.sort(reservoir);
}

/**
* Get the minimum value.
*
* @return the minimum value.
*/
public long getMin() {
return min;
}

/**
* Get the maximum value.
*
* @return the maximum value.
*/
public long getMax() {
return max;
}

/**
* Returns the average value.
*
* @return the average value
*/
public double getMean() {
return total / (double) count;
}

/**
* Get the total of recorded values.
*
* @return the total of all values
*/
public long getTotal() {
return total;
}

/**
* Returns the value at the given quantile.
*
* Snapshot histogram before calling this method.
*
* @param quantile a given quantile, in {@code [0..1]}
* @return the value in the distribution at {@code quantile}
*/
public double getValue(double quantile) {
if (quantile < 0.0 || quantile > 1.0 || Double.isNaN(quantile)) {
throw new IllegalArgumentException(quantile + " is not in [0..1]");
}

if (reservoir.length == 0) {
return 0.0;
}

final double pos = quantile * (reservoir.length + 1);
final int index = (int) pos;

if (index < 1) {
return reservoir[0];
}

if (index >= reservoir.length) {
return reservoir[reservoir.length - 1];
}

final double lower = reservoir[index - 1];
final double upper = reservoir[index];
return lower + (pos - Math.floor(pos)) * (upper - lower);
}

/**
* Returns the standard deviation of the values.
*
* @return the standard deviation value
*/
public double getStdDev() {
if (reservoir.length <= 1) {
return 0;
}

final double mean = getMean();
double sum = 0;
for (long value : reservoir) {
final double diff = value - mean;
sum += diff * diff;
}

final double variance = sum / (reservoir.length - 1);
return Math.sqrt(variance);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,9 @@ public static void main(String[] args) {
System.exit(0);
}

long minSize = Long.MAX_VALUE;
long maxSize = 0;
Histogram sizeHistogram = new Histogram();
Histogram sstableHistogram = new Histogram();
long partitionCount = 0;
long totalSize = 0;
int minTables = Integer.MAX_VALUE;
int maxTables = 0;
long totalTables = 0;

MinMaxPriorityQueue<PartitionStatistics> largestPartitions = MinMaxPriorityQueue
.orderedBy(PartitionStatistics.SIZE_COMPARATOR)
Expand All @@ -130,25 +126,30 @@ public static void main(String[] args) {
progressBar.updateProgress(partitionReader.getProgress());
largestPartitions.add(stat);
tableCountLeaders.add(stat);
minSize = Math.min(minSize, stat.size);
maxSize = Math.max(maxSize, stat.size);
totalSize += stat.size;
minTables = Math.min(minTables, stat.tableCount);
maxTables = Math.max(maxTables, stat.tableCount);
totalTables += stat.tableCount;
sizeHistogram.update(stat.size);
sstableHistogram.update(stat.tableCount);
partitionCount++;
}
sizeHistogram.snapshot();
sstableHistogram.snapshot();

cfProxy.close();

System.out.println("Summary:");
TableBuilder tb = new TableBuilder();
tb.setHeader("", "Size", "SSTable");
tb.addRow("Count", Long.toString(partitionCount), "");
tb.addRow("Total", Util.humanReadableByteCount(totalSize), Integer.toString(sstableReaders.size()));
tb.addRow("Minimum", Util.humanReadableByteCount(minSize), Integer.toString(minTables));
tb.addRow("Maximum", Util.humanReadableByteCount(maxSize), Integer.toString(maxTables));
tb.addRow("Average", Util.humanReadableByteCount(totalSize / partitionCount), String.format("%.1f", totalTables / (double) partitionCount));
tb.addRow("Total", Util.humanReadableByteCount(sizeHistogram.getTotal()), Integer.toString(sstableReaders.size()));
tb.addRow("Minimum", Util.humanReadableByteCount(sizeHistogram.getMin()), Long.toString(sstableHistogram.getMin()));
tb.addRow("Average", Util.humanReadableByteCount(Math.round(sizeHistogram.getMean())), String.format("%.1f", sstableHistogram.getMean()));
tb.addRow("std dev.", Util.humanReadableByteCount(Math.round(sizeHistogram.getStdDev())), String.format("%.1f", sstableHistogram.getStdDev()));
tb.addRow("50%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.5))), String.format("%.1f", sstableHistogram.getValue(0.5)));
tb.addRow("75%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.75))), String.format("%.1f", sstableHistogram.getValue(0.75)));
tb.addRow("90%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.9))), String.format("%.1f", sstableHistogram.getValue(0.9)));
tb.addRow("95%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.95))), String.format("%.1f", sstableHistogram.getValue(0.95)));
tb.addRow("99%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.99))), String.format("%.1f", sstableHistogram.getValue(0.99)));
tb.addRow("99.9%", Util.humanReadableByteCount(Math.round(sizeHistogram.getValue(0.999))), String.format("%.1f", sstableHistogram.getValue(0.999)));
tb.addRow("Maximum", Util.humanReadableByteCount(sizeHistogram.getMax()), Long.toString(sstableHistogram.getMax()));
System.out.println(tb);

System.out.println("Largest partitions:");
Expand Down
5 changes: 0 additions & 5 deletions src/com/instaclustr/sstabletools/SSTableStatistics.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,6 @@ public int compare(SSTableStatistics o1, SSTableStatistics o2) {
*/
public long liveCellCount = 0;

/**
* SSTable delete cell count.
*/
public long deleteCellCount = 0;

/**
* SSTable expiring cell count.
*/
Expand Down

0 comments on commit a697824

Please sign in to comment.