Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@

import com.clearspring.analytics.stream.cardinality.HyperLogLog;
import java.math.BigDecimal;
import java.util.Base64;
import javax.annotation.Nullable;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.AnotB;
import org.apache.datasketches.theta.Intersection;
import org.apache.datasketches.theta.SetOperationBuilder;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.Sketches;
import org.apache.datasketches.theta.Union;
import org.apache.datasketches.theta.UpdateSketch;
import org.apache.datasketches.tuple.aninteger.IntegerSketch;
import org.apache.datasketches.tuple.aninteger.IntegerSummary;
Expand Down Expand Up @@ -65,6 +72,8 @@
* }
*/
public class SketchFunctions {
private static final SetOperationBuilder SET_OPERATION_BUILDER = new SetOperationBuilder();

private SketchFunctions() {
}

Expand Down Expand Up @@ -184,4 +193,87 @@ public static byte[] toIntegerSumTupleSketch(@Nullable Object key, Integer value
}
return ObjectSerDeUtils.DATA_SKETCH_INT_TUPLE_SER_DE.serialize(is.compact());
}

@ScalarFunction(names = {"getThetaSketchEstimate", "get_theta_sketch_estimate"})
public static long getThetaSketchEstimate(Object sketchObject) {
return Math.round(asThetaSketch(sketchObject).getEstimate());
}

@ScalarFunction(names = {"thetaSketchUnion", "theta_sketch_union"})
public static Sketch thetaSketchUnion(Object o1, Object o2) {
return thetaSketchUnionVar(o1, o2);
}

@ScalarFunction(names = {"thetaSketchUnion", "theta_sketch_union"})
public static Sketch thetaSketchUnion(Object o1, Object o2, Object o3) {
return thetaSketchUnionVar(o1, o2, o3);
}

@ScalarFunction(names = {"thetaSketchUnion", "theta_sketch_union"})
public static Sketch thetaSketchUnion(Object o1, Object o2, Object o3, Object o4) {
return thetaSketchUnionVar(o1, o2, o3, o4);
}

@ScalarFunction(names = {"thetaSketchUnion", "theta_sketch_union"})
public static Sketch thetaSketchUnion(Object o1, Object o2, Object o3, Object o4, Object o5) {
return thetaSketchUnionVar(o1, o2, o3, o4, o5);
}

@ScalarFunction(names = {"thetaSketchIntersect", "theta_sketch_intersect"})
public static Sketch thetaSketchIntersect(Object o1, Object o2) {
return thetaSketchIntersectVar(o1, o2);
}

@ScalarFunction(names = {"thetaSketchIntersect", "theta_sketch_intersect"})
public static Sketch thetaSketchIntersect(Object o1, Object o2, Object o3) {
return thetaSketchIntersectVar(o1, o2, o3);
}

@ScalarFunction(names = {"thetaSketchIntersect", "theta_sketch_intersect"})
public static Sketch thetaSketchIntersect(Object o1, Object o2, Object o3, Object o4) {
return thetaSketchIntersectVar(o1, o2, o3, o4);
}

@ScalarFunction(names = {"thetaSketchIntersect", "theta_sketch_intersect"})
public static Sketch thetaSketchIntersect(Object o1, Object o2, Object o3, Object o4, Object o5) {
return thetaSketchIntersectVar(o1, o2, o3, o4, o5);
}

@ScalarFunction(names = {"thetaSketchDiff", "theta_sketch_diff"})
public static Sketch thetaSketchDiff(Object sketchObjectA, Object sketchObjectB) {
AnotB diff = SET_OPERATION_BUILDER.buildANotB();
diff.setA(asThetaSketch(sketchObjectA));
diff.notB(asThetaSketch(sketchObjectB));
return diff.getResult(false, null, false);
}

private static Sketch thetaSketchUnionVar(Object... sketchObjects) {
Union union = SET_OPERATION_BUILDER.buildUnion();
for (Object sketchObj : sketchObjects) {
union.union(asThetaSketch(sketchObj));
}
return union.getResult(false, null);
}

private static Sketch thetaSketchIntersectVar(Object... sketchObjects) {
Intersection intersection = SET_OPERATION_BUILDER.buildIntersection();
for (Object sketchObj : sketchObjects) {
intersection.intersect(asThetaSketch(sketchObj));
}
return intersection.getResult(false, null);
}

private static Sketch asThetaSketch(Object sketchObj) {
if (sketchObj instanceof String) {
byte[] decoded = Base64.getDecoder().decode((String) sketchObj);
return Sketches.wrapSketch(Memory.wrap((decoded)));
} else if (sketchObj instanceof Sketch) {
return (Sketch) sketchObj;
} else if (sketchObj instanceof byte[]) {
return Sketches.wrapSketch(Memory.wrap((byte[]) sketchObj));
} else {
throw new RuntimeException("Exception occurred getting estimate from Theta Sketch, unsupported Object type: "
+ sketchObj.getClass());
}
}
}
20 changes: 20 additions & 0 deletions pinot-query-runtime/src/test/resources/queries/UDFAggregates.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,26 @@
{
"sql": "SELECT SUMPRECISION(decimal_col) FROM {tbl}",
"outputs": [["10000000000100000000110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000001"]]
},
{
"sql": "SELECT string_col, SUMPRECISION(decimal_col) FROM {tbl} GROUP BY string_col",
"outputs": [["a" ,"10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"], ["b", "100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"], ["c", "10000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ string_col, SUMPRECISION(decimal_col) FROM {tbl} GROUP BY string_col",
"outputs": [["a" ,"10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"], ["b", "100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"], ["c", "10000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001"]]
},
{
"sql": "select GET_THETA_SKETCH_ESTIMATE(DISTINCT_COUNT_RAW_THETA_SKETCH(string_col, 'nominalEntries=16')), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF(DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''))), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION(DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(long_col, ''))), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(DISTINCT_COUNT_RAW_THETA_SKETCH(double_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(float_col, 'nominalEntries=1000'))) FROM {tbl}",
"outputs": [[3, 0, 6, 6]]
},
{
"sql": "select bool_col, GET_THETA_SKETCH_ESTIMATE(DISTINCT_COUNT_RAW_THETA_SKETCH(string_col, 'nominalEntries=16')), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF(DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''))), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION(DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(long_col, ''))), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(DISTINCT_COUNT_RAW_THETA_SKETCH(double_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(float_col, 'nominalEntries=1000'))) FROM {tbl} GROUP BY bool_col",
"outputs": [[true, 2, 0, 2, 3], [false, 2, 0, 4, 3]]
},
{
"sql": "select /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ bool_col, GET_THETA_SKETCH_ESTIMATE(DISTINCT_COUNT_RAW_THETA_SKETCH(string_col, 'nominalEntries=16')), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF(DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''))), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION(DISTINCT_COUNT_RAW_THETA_SKETCH(int_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(long_col, ''))), GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(DISTINCT_COUNT_RAW_THETA_SKETCH(double_col, ''), DISTINCT_COUNT_RAW_THETA_SKETCH(float_col, 'nominalEntries=1000'))) FROM {tbl} GROUP BY bool_col",
"outputs": [[true, 2, 0, 2, 3], [false, 2, 0, 4, 3]]
}
]
}
Expand Down