Skip to content

[feature] use binary indexed tree to optimize histogram #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/main/java/simpledb/optimizer/BinaryIndexedTree.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package simpledb.optimizer;

import javafx.scene.SubScene;

public class BinaryIndexedTree {
int[] tr;
int cnt;
int n;
public BinaryIndexedTree(int n){
tr = new int[n + 1];
this.n = n;
cnt = 0;
}
public void add(int ind, int val){

while(ind <= n){
tr[ind] += val;
ind += (-ind&ind);
}
cnt ++ ;
}
public long query(int ind){
long res = 0;
if(ind > n) return cnt;
while(ind > 0){
res += (long) tr[ind];
ind -= (-ind&ind);
}
return res;
}
public long range(int left, int right){
return query(right) - query(left - 1);
}

public long index(int ind){
if(ind == 0){
return query(0);
}else{
return query(ind) - query(ind - 1);
}
}
}
41 changes: 19 additions & 22 deletions src/main/java/simpledb/optimizer/IntHistogram.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,24 @@ public class IntHistogram implements Histogram<Integer> {

private int maxVal;
private int minVal;
private int[] heights;
private int buckets;
private int totalTuples;
private int width;
private int lastBucketWidth;
private BinaryIndexedTree bit;

/**
* Create a new IntHistogram.
*
*
* This IntHistogram should maintain a histogram of integer values that it receives.
* It should split the histogram into "buckets" buckets.
*
*
* The values that are being histogrammed will be provided one-at-a-time through the "addValue()" function.
*
*
* Your implementation should use space and have execution time that are both
* constant with respect to the number of values being histogrammed. For example, you shouldn't
* constant with respect to the number of values being histogrammed. For example, you shouldn't
* simply store every value that you see in a sorted list.
*
*
* @param buckets The number of buckets to split the input value into.
* @param min The minimum integer value that will ever be passed to this class for histogramming
* @param max The maximum integer value that will ever be passed to this class for histogramming
Expand All @@ -37,7 +37,7 @@ public IntHistogram(int buckets, int min, int max) {
this.minVal = min;
this.maxVal = max;
this.buckets = buckets;
this.heights = new int[buckets];
this.bit = new BinaryIndexedTree(buckets);
int total = max - min + 1;
this.width = Math.max(total / buckets, 1);
this.lastBucketWidth = total - (this.width * (buckets - 1));
Expand All @@ -53,12 +53,13 @@ public void addValue(Integer v) {
if (v < this.minVal || v > this.maxVal) {
return;
}

int bucketIndex = (v - this.minVal) / this.width;
if (bucketIndex >= this.buckets) {
return;
}
this.totalTuples++;
this.heights[bucketIndex]++;
this.bit.add(bucketIndex + 1, 1);
}

private double estimateGreater(int bucketIndex, int predicateValue, int bucketWidth) {
Expand All @@ -72,12 +73,8 @@ private double estimateGreater(int bucketIndex, int predicateValue, int bucketWi
// As the lab3 doc, result = ((right - val) / bucketWidth) * (bucketTuples / totalTuples)
int bucketRight = bucketIndex * this.width + this.minVal;
double bucketRatio = (bucketRight - predicateValue) * 1.0 / bucketWidth;
double result = bucketRatio * (this.heights[bucketIndex] * 1.0 / this.totalTuples);

int sum = 0;
for (int i = bucketIndex + 1; i < this.buckets; i++) {
sum += this.heights[i];
}
double result = bucketRatio * ((bit.index(bucketIndex + 1) * 1.0) / totalTuples);
int sum = (int) (bit.cnt - bit.query(bucketIndex + 1));
return (sum * 1.0) / this.totalTuples + result;
}

Expand All @@ -86,18 +83,18 @@ private double estimateEqual(int bucketIndex, int predicateValue, int bucketWidt
return 0;
}
// As the lab3 doc, result = (bucketHeight / bucketWidth) / totalTuples
double result = this.heights[bucketIndex];
double result = bit.index(bucketIndex + 1);
result = result / bucketWidth;
result = result / this.totalTuples;
return result;
}

/**
* Estimate the selectivity of a particular predicate and operand on this table.
*
* For example, if "op" is "GREATER_THAN" and "v" is 5,
*
* For example, if "op" is "GREATER_THAN" and "v" is 5,
* return your estimate of the fraction of elements that are greater than 5.
*
*
* @param op Operator
* @param v Value
* @return Predicted selectivity of this particular operator and value
Expand Down Expand Up @@ -134,7 +131,7 @@ public double estimateSelectivity(Predicate.Op op, Integer v) {
/**
* @return
* the average selectivity of this histogram.
*
*
* This is not an indispensable method to implement the basic
* join optimization. It may be needed if you want to
* implement a more efficient optimization
Expand All @@ -149,8 +146,8 @@ public double avgSelectivity() {
*/
@Override
public String toString() {
return "IntHistogram{" + "maxVal=" + maxVal + ", minVal=" + minVal + ", heights=" + Arrays.toString(heights)
+ ", buckets=" + buckets + ", totalTuples=" + totalTuples + ", width=" + width + ", lastBucketWidth="
+ lastBucketWidth + '}';
return "IntHistogram{" + "maxVal=" + maxVal + ", minVal=" + minVal + ", heights="
+ ", buckets=" + buckets + ", totalTuples=" + totalTuples + ", width=" + width + ", lastBucketWidth="
+ lastBucketWidth + '}';
}
}