diff --git a/src/main/java/org/apache/datasketches/partitions/BoundsRule.java b/src/main/java/org/apache/datasketches/partitions/BoundsRule.java index 68dc87bc1..ecda05e1b 100644 --- a/src/main/java/org/apache/datasketches/partitions/BoundsRule.java +++ b/src/main/java/org/apache/datasketches/partitions/BoundsRule.java @@ -33,5 +33,10 @@ public enum BoundsRule { /** * Include only the lower bound but not the upper bound */ - INCLUDE_LOWER + INCLUDE_LOWER, + /** + * Include none + */ + INCLUDE_NEITHER; + } diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java b/src/main/java/org/apache/datasketches/partitions/Partitioner.java index 9bc3eeec5..be1247ca3 100644 --- a/src/main/java/org/apache/datasketches/partitions/Partitioner.java +++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java @@ -26,6 +26,10 @@ import static java.lang.Math.pow; import static java.lang.Math.round; import static java.util.Collections.unmodifiableList; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_LOWER; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_NEITHER; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; @@ -184,33 +188,35 @@ public static class PartitionBoundsRow { public PartitionBoundsRow(final StackElement se) { final GenericPartitionBoundaries gpb = se.gpb; - this.part = se.part; - this.levelPartId = se.levelPartId + "." + part; final QuantileSearchCriteria searchCrit = gpb.getSearchCriteria(); final T[] boundaries = gpb.getBoundaries(); final int numParts = gpb.getNumPartitions(); + this.part = se.part; + this.levelPartId = se.levelPartId + "." + part; + final long num; + this.approxNumDeltaItems = num = gpb.getNumDeltaItems()[part]; if (searchCrit == INCLUSIVE) { if (part == 1) { lowerBound = gpb.getMinItem(); upperBound = boundaries[part]; - rule = BoundsRule.INCLUDE_BOTH; + rule = (num == 0) ? INCLUDE_NEITHER : (lowerBound == upperBound) ? INCLUDE_UPPER : INCLUDE_BOTH; } else { lowerBound = boundaries[part - 1]; upperBound = boundaries[part]; - rule = BoundsRule.INCLUDE_UPPER; + rule = (num == 0) ? INCLUDE_NEITHER : INCLUDE_UPPER; } - } else { //EXCLUSIVE + } + else { //EXCLUSIVE if (part == numParts) { lowerBound = boundaries[part - 1]; upperBound = gpb.getMaxItem(); - rule = BoundsRule.INCLUDE_BOTH; + rule = (num == 0) ? INCLUDE_NEITHER : (lowerBound == upperBound) ? INCLUDE_LOWER : INCLUDE_BOTH; } else { lowerBound = boundaries[part - 1]; upperBound = boundaries[part]; - rule = BoundsRule.INCLUDE_LOWER; + rule = (num == 0) ? INCLUDE_NEITHER : INCLUDE_LOWER; } } - approxNumDeltaItems = gpb.getNumDeltaItems()[part]; } } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index 9638a9a9e..93c956057 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -19,6 +19,7 @@ package org.apache.datasketches.quantiles; +import static org.apache.datasketches.quantiles.ClassicUtil.getNormalizedRankError; import static org.apache.datasketches.quantilescommon.GenericInequalitySearch.find; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; @@ -55,6 +56,7 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition private final T maxItem; private final T minItem; private final Class clazz; + private final int k; /** * Construct from elements for testing. @@ -70,7 +72,8 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition final long totalN, final Comparator comparator, final T maxItem, - final T minItem) { + final T minItem, + final int k) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; @@ -78,6 +81,7 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition this.maxItem = maxItem; this.minItem = minItem; this.clazz = (Class)quantiles[0].getClass(); + this.k = k; } /** @@ -88,14 +92,14 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition ItemsSketchSortedView(final ItemsSketch sketch) { if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } this.totalN = sketch.getN(); - final int k = sketch.getK(); final int numQuantiles = sketch.getNumRetained(); this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); this.minItem = sketch.minItem_; this.maxItem = sketch.maxItem_; - cumWeights = new long[numQuantiles]; - comparator = sketch.getComparator(); - clazz = sketch.clazz; + this.cumWeights = new long[numQuantiles]; + this.comparator = sketch.getComparator(); + this.clazz = sketch.clazz; + this.k = sketch.getK(); final Object[] combinedBuffer = sketch.getCombinedBuffer(); final int baseBufferCount = sketch.getBaseBufferCount(); @@ -155,27 +159,84 @@ public GenericPartitionBoundaries getPartitionBoundaries(final int numEqually final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } final long totalN = this.totalN; + final double delta = getNormalizedRankError(k, true) * totalN; + final int maxParts = (int) (totalN / Math.ceil(delta * 2) ); final int svLen = cumWeights.length; - //adjust ends of sortedView arrays - cumWeights[0] = 1L; - cumWeights[svLen - 1] = totalN; - quantiles[0] = this.getMinItem(); - quantiles[svLen - 1] = this.getMaxItem(); - - final double[] evSpNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized + 1); - final int len = evSpNormRanks.length; - final T[] evSpQuantiles = (T[]) Array.newInstance(clazz, len); - final long[] evSpNatRanks = new long[len]; - for (int i = 0; i < len; i++) { - final int index = getQuantileIndex(evSpNormRanks[i], searchCrit); - evSpQuantiles[i] = quantiles[index]; - evSpNatRanks[i] = cumWeights[index]; + if (numEquallySized > maxParts) { + throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG + + "The requested number of partitions is too large for the 'k' of this sketch " + + "if it exceeds the maximum number of partitions allowed by the error threshold for the 'k' of this sketch." + + "Requested Partitions: " + numEquallySized + " > " + maxParts); + } + if (numEquallySized > svLen / 2.0) { + throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG + + "The requested number of partitions is too large for the number of retained items " + + "if it exceeds maximum number of retained items divided by 2." + + "Requested Partitions: " + numEquallySized + " > " + + "Retained Items / 2: " + (svLen / 2)); } + + final double[] searchNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized + 1); + final int partArrLen = searchNormRanks.length; + final T[] partQuantiles = (T[]) Array.newInstance(clazz, partArrLen); + final long[] partNatRanks = new long[partArrLen]; + final double[] partNormRanks = new double[partArrLen]; + + //Adjust End Points: The ends of the Sorted View arrays may be missing the actual MinItem and MaxItem bounds, + // which are absolutely required when partitioning, especially inner partitions. + + //Are the minItem and maxItem already in place? + int adjLen = svLen; //this will be the length of the local copies of quantiles and cumWeights + final boolean adjLow = quantiles[0] != minItem; //if true, adjust the low end + final boolean adjHigh = quantiles[svLen - 1] != maxItem; //if true, adjust the high end + adjLen += adjLow ? 1 : 0; + adjLen += adjHigh ? 1 : 0; + + //These are local copies of the quantiles and cumWeights arrays just for partitioning. + //The rest of the SV remains unchanged. + final T[] adjQuantiles; + final long[] adjCumWeights; + if (adjLen > svLen) { //is any adjustment required at all? + adjQuantiles = (T[]) new Object[adjLen]; + adjCumWeights = new long[adjLen]; + final int offset = adjLow ? 1 : 0; + System.arraycopy(quantiles, 0, adjQuantiles, offset, svLen); + System.arraycopy(cumWeights,0, adjCumWeights, offset, svLen); + + //Adjust the low end if required. + if (adjLow) { + adjQuantiles[0] = minItem; + adjCumWeights[0] = 1; + } + //When inserting a MaxItem, if required, we can't just add it at the top of the quantiles array, + // we have to adjust the cumulative weight of the item just before it as well so that the maximum cumulative + // weight at the upper end still equals totalN. (This is not the case at the low end. Quiz #1: Why? ) + // If the maxItem is missing, the quantile that is currently in the top + // position must have a weight >= 2. (Quiz #2: Why?). Thus, it is safe to subtract 1. + if (adjHigh) { + adjQuantiles[adjLen - 1] = maxItem; + adjCumWeights[adjLen - 1] = cumWeights[svLen - 1]; + adjCumWeights[adjLen - 2] = cumWeights[svLen - 1] - 1; + } + } else { //both min and max are already in place, no adjustments are required. + adjQuantiles = quantiles; + adjCumWeights = cumWeights; + } //END of Adjust End Points + + //compute the quantiles and natural and normalized ranks for the partition boundaries. + for (int i = 0; i < partArrLen; i++) { + final int index = getQuantileIndex(searchNormRanks[i], adjCumWeights, searchCrit); + partQuantiles[i] = adjQuantiles[index]; + final long cumWt = adjCumWeights[index]; + partNatRanks[i] = cumWt; + partNormRanks[i] = (double)cumWt / totalN; + } + //Return the GPB of the complete specification of the boundaries. final GenericPartitionBoundaries gpb = new GenericPartitionBoundaries<>( this.totalN, - evSpQuantiles, - evSpNatRanks, - evSpNormRanks, + partQuantiles, + partNatRanks, + partNormRanks, getMaxItem(), getMinItem(), searchCrit); @@ -198,13 +259,14 @@ public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searc public T getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); - final int index = getQuantileIndex(rank, searchCrit); + final int index = getQuantileIndex(rank, cumWeights, searchCrit); return quantiles[index]; } - private int getQuantileIndex(final double rank, final QuantileSearchCriteria searchCrit) { + private int getQuantileIndex(final double normRank, final long[] cumWeights, + final QuantileSearchCriteria searchCrit) { final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN, searchCrit); + final double naturalRank = getNaturalRank(normRank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { return len - 1; } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsUtil.java b/src/main/java/org/apache/datasketches/quantiles/ItemsUtil.java index bf42dd18c..8138464c3 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsUtil.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsUtil.java @@ -20,6 +20,9 @@ package org.apache.datasketches.quantiles; import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.quantiles.ClassicUtil.computeNumLevelsNeeded; +import static org.apache.datasketches.quantiles.ClassicUtil.computeValidLevels; +import static org.apache.datasketches.quantiles.ClassicUtil.getNormalizedRankError; import java.util.Arrays; @@ -85,7 +88,7 @@ static String toString(final boolean sketchSummary, final boolean dataDetail final long bitPattern = sketch.getBitPattern(); if (dataDetail) { - sb.append(ClassicUtil.LS).append("### ").append(thisSimpleName).append(" DATA DETAIL: ").append(ClassicUtil.LS); + sb.append(LS).append("### ").append(thisSimpleName).append(" DATA DETAIL: ").append(LS); final Object[] items = sketch.getCombinedBuffer(); //output the base buffer @@ -95,7 +98,7 @@ static String toString(final boolean sketchSummary, final boolean dataDetail sb.append(' ').append(items[i]); } } - sb.append(ClassicUtil.LS); + sb.append(LS); //output all the levels final int numItems = combAllocCount; if (numItems > (2 * k)) { @@ -105,46 +108,45 @@ static String toString(final boolean sketchSummary, final boolean dataDetail final int levelNum = j > (2 * k) ? (j - (2 * k)) / k : 0; final String validLvl = ((1L << levelNum) & bitPattern) > 0 ? " T " : " F "; final String lvl = String.format("%5d", levelNum); - sb.append(ClassicUtil.LS).append(" ").append(validLvl).append(" ").append(lvl).append(":"); + sb.append(LS).append(" ").append(validLvl).append(" ").append(lvl).append(":"); } sb.append(' ').append(items[j]); } - sb.append(ClassicUtil.LS); + sb.append(LS); } - sb.append("### END DATA DETAIL").append(ClassicUtil.LS); + sb.append("### END DATA DETAIL").append(LS); } if (sketchSummary) { final long n = sketch.getN(); final String nStr = String.format("%,d", n); - final int numLevels = ClassicUtil.computeNumLevelsNeeded(k, n); - final String bufCntStr = String.format("%,d", combAllocCount); + final String baseBufCntStr = String.format("%,d", bbCount); + final String cBufCntStr = String.format("%,d", combAllocCount); + final int totNumLevels = computeNumLevelsNeeded(k, n); + final int numValidSamples = sketch.getNumRetained(); + final String numValidSampStr = String.format("%,d", numValidSamples); final int preBytes = sketch.isEmpty() ? Long.BYTES : 2 * Long.BYTES; - final double epsPmf = ClassicUtil.getNormalizedRankError(k, true); + final double epsPmf = getNormalizedRankError(k, true); final String epsPmfPctStr = String.format("%.3f%%", epsPmf * 100.0); - final double eps = ClassicUtil.getNormalizedRankError(k, false); + final double eps = getNormalizedRankError(k, false); final String epsPctStr = String.format("%.3f%%", eps * 100.0); - final int numSamples = sketch.getNumRetained(); - final String numSampStr = String.format("%,d", numSamples); final T minItem = sketch.isEmpty() ? null : sketch.getMinItem(); final T maxItem = sketch.isEmpty() ? null : sketch.getMaxItem(); - sb.append(ClassicUtil.LS).append("### ").append(thisSimpleName).append(" SUMMARY: ").append(ClassicUtil.LS); - sb.append(" K : ").append(k).append(ClassicUtil.LS); - sb.append(" N : ").append(nStr).append(ClassicUtil.LS); - sb.append(" BaseBufferCount : ").append(bbCount).append(ClassicUtil.LS); - sb.append(" CombinedBufferAllocatedCount : ").append(bufCntStr).append(ClassicUtil.LS); - sb.append(" Total Levels : ").append(numLevels).append(ClassicUtil.LS); - sb.append(" Valid Levels : ").append(ClassicUtil.computeValidLevels(bitPattern)) - .append(ClassicUtil.LS); - sb.append(" Level Bit Pattern : ").append(Long.toBinaryString(bitPattern)) - .append(ClassicUtil.LS); - sb.append(" Valid Samples : ").append(numSampStr).append(ClassicUtil.LS); - sb.append(" Preamble Bytes : ").append(preBytes).append(ClassicUtil.LS); + sb.append(LS).append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS); + sb.append(" K : ").append(k).append(LS); + sb.append(" N : ").append(nStr).append(LS); + sb.append(" BaseBufferCount : ").append(baseBufCntStr).append(LS); + sb.append(" CombinedBufferAllocatedCount : ").append(cBufCntStr).append(LS); + sb.append(" Total Levels : ").append(totNumLevels).append(LS); + sb.append(" Valid Levels : ").append(computeValidLevels(bitPattern)).append(LS); + sb.append(" Level Bit Pattern : ").append(Long.toBinaryString(bitPattern)).append(LS); + sb.append(" Valid Samples : ").append(numValidSampStr).append(LS); + sb.append(" Preamble Bytes : ").append(preBytes).append(LS); sb.append(" Normalized Rank Error : ").append(epsPctStr).append(LS); sb.append(" Normalized Rank Error (PMF) : ").append(epsPmfPctStr).append(LS); - sb.append(" Min Quantile : ").append(minItem).append(ClassicUtil.LS); - sb.append(" Max Quantile : ").append(maxItem).append(ClassicUtil.LS); - sb.append("### END SKETCH SUMMARY").append(ClassicUtil.LS); + sb.append(" Min Quantile : ").append(minItem).append(LS); + sb.append(" Max Quantile : ").append(maxItem).append(LS); + sb.append("### END SKETCH SUMMARY").append(LS); } return sb.toString(); } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java index 5a5c00e26..fcfefa12f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java @@ -19,6 +19,8 @@ package org.apache.datasketches.quantilescommon; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + /** * Iterator over quantile sketches of generic type. * @param The generic quantile type @@ -32,10 +34,10 @@ public GenericSortedViewIterator(final T[] quantiles, final long[] cumWeights) { } /** - * Gets the quantile at the current index. + * Gets the quantile at the current index + * This is equivalent to getQuantile(INCLUSIVE). * - *

Don't call this before calling next() for the first time - * or after getting false from next().

+ *

Don't call this before calling next() for the first time or after getting false from next().

* * @return the quantile at the current index. */ @@ -43,4 +45,21 @@ public T getQuantile() { return quantiles[index]; } + /** + * Gets the quantile at the current index (or previous index) + * based on the chosen search criterion. + * + *

Don't call this before calling next() for the first time or after getting false from next().

+ * + * @param searchCrit if INCLUSIVE, includes the quantile at the current index. + * Otherwise, returns the quantile of the previous index. + * + * @return the quantile at the current index (or previous index) + * based on the chosen search criterion. If the chosen search criterion is EXCLUSIVE and + * the current index is at zero, this will return null. + */ + public T getQuantile(final QuantileSearchCriteria searchCrit) { + if (searchCrit == INCLUSIVE) { return quantiles[index]; } + return (index == 0) ? null : quantiles[index - 1]; + } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java index 06c298d4e..01af51eab 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java @@ -48,17 +48,29 @@ public class SortedViewIterator { index = -1; } + /** + * Gets the natural rank at the current index. + * This is equivalent to getNaturalRank(INCLUSIVE). + * + *

Don't call this before calling next() for the first time or after getting false from next().

+ * + * @return the natural rank at the current index. + */ + public long getNaturalRank() { + return cumWeights[index]; + } + /** * Gets the natural rank at the current index (or previous index) based on the chosen search criterion. * This is also referred to as the "cumulative weight". The natural rank is a number in the range [1, N], * where N ({@link #getN()}) is the total number of items fed to the sketch. * - *

Don't call this before calling next() for the first time - * or after getting false from next().

+ *

Don't call this before calling next() for the first time or after getting false from next().

* * @param searchCrit if INCLUSIVE, includes the weight of the item at the current index in the computation of * the natural rank. * Otherwise, it will return the natural rank of the previous index. + * * @return the natural rank at the current index (or previous index) based on the chosen search criterion. */ public long getNaturalRank(final QuantileSearchCriteria searchCrit) { @@ -74,16 +86,28 @@ public long getN() { return totalN; } + /** + * Gets the normalized rank at the current index. + * This is equivalent to getNormalizedRank(INCLUSIVE). + * + *

Don't call this before calling next() for the first time or after getting false from next().

+ * + * @return the normalized rank at the current index + */ + public double getNormalizedRank() { + return (double) getNaturalRank() / totalN; + } + /** * Gets the normalized rank at the current index (or previous index) * based on the chosen search criterion. Where normalized rank = natural rank / N ({@link #getN()}) * and is a fraction in the range (0,1.0]. * - *

Don't call this before calling next() for the first time - * or after getting false from next().

+ *

Don't call this before calling next() for the first time or after getting false from next().

* * @param searchCrit if INCLUSIVE, includes the normalized rank at the current index. * Otherwise, returns the normalized rank of the previous index. + * * @return the normalized rank at the current index (or previous index) * based on the chosen search criterion. */ @@ -94,8 +118,7 @@ public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { /** * Gets the weight contribution of the item at the current index. * - *

Don't call this before calling next() for the first time - * or after getting false from next().

+ *

Don't call this before calling next() for the first time or after getting false from next().

* * @return the weight contribution of the item at the current index. */ diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java new file mode 100644 index 000000000..03c41776b --- /dev/null +++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantiles; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; + +import java.util.Comparator; + +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.GenericSortedViewIterator; +import org.testng.annotations.Test; + +public class ItemsSketchPartitionBoundariesTest { + private static final int k = 128; + + @Test + public void checkSimpleEndsAdjustment() { + final String[] quantiles = {"2","4","6","7"}; + final long[] cumWeights = {2, 4, 6, 8}; + final long totalN = 8; + final Comparator comparator = Comparator.naturalOrder(); + final String maxItem = "8"; + final String minItem = "1"; + ItemsSketchSortedViewString sv = new ItemsSketchSortedViewString( + quantiles, cumWeights, totalN, comparator, maxItem, minItem, k); + + GenericSortedViewIterator itr = sv.iterator(); + while (itr.next()) { + println(itr.getNaturalRank(INCLUSIVE) + ", " + itr.getQuantile(INCLUSIVE)); + } + GenericPartitionBoundaries gpb = sv.getPartitionBoundaries(2); + String[] boundaries = gpb.getBoundaries(); + long[] natRanks = gpb.getNaturalRanks(); + double[] normRanks = gpb.getNormalizedRanks(); + long[] deltaItems = gpb.getNumDeltaItems(); + int numParts = gpb.getNumPartitions(); + String maxItm = gpb.getMaxItem(); + String minItm = gpb.getMinItem(); + assertEquals(boundaries, new String[] {"1","4","8"}); + assertEquals(natRanks, new long[] {1,4,8}); + assertEquals(normRanks, new double[] {.125,.5,1.0}); + assertEquals(deltaItems, new long[] {0,4,4}); + assertEquals(numParts, 2); + assertEquals(maxItm, "8"); + assertEquals(minItm, "1"); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java index 0dbf5bde2..09a8accdb 100644 --- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java +++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java @@ -32,7 +32,8 @@ public ItemsSketchSortedViewString( final long totalN, final Comparator comparator, final String maxItem, - final String minItem) { - super(quantiles, cumWeights, totalN, comparator, maxItem, minItem); + final String minItem, + final int k) { + super(quantiles, cumWeights, totalN, comparator, maxItem, minItem, k); } } diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java index 6d1b0d0c5..3b458a56f 100644 --- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.quantiles; +import static org.apache.datasketches.quantiles.PreambleUtil.DEFAULT_K; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.testng.Assert.assertEquals; @@ -618,7 +619,7 @@ public void sortedView2() { Double[] qArr = {8.0, 10.0, 10.0, 20.0}; long[] cwArr = {1, 3, 4, 5}; Comparator comp = Comparator.naturalOrder(); - ItemsSketchSortedView sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0); + ItemsSketchSortedView sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0, DEFAULT_K); double[] ranks = {0, .1, .2, .3, .6, .7, .8, .9, 1.0}; Double[] qOut = new Double[9]; for (int i = 0; i < ranks.length; i++) { diff --git a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java index 2cbc006b2..d66bf5f3e 100644 --- a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java +++ b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java @@ -349,7 +349,7 @@ private void buildSVs(int set) throws Exception { kllItemsSV = new KllItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set], comparator, svImax, svImin); itemsSV = new ItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set], - comparator, svImax, svImin); + comparator, svImax, svImin, k); } private final static ReqSketchSortedView getRawReqSV( diff --git a/src/test/java/org/apache/datasketches/sampling/EbppsSketchTest.java b/src/test/java/org/apache/datasketches/sampling/EbppsSketchTest.java index e6c151125..8980b84f2 100644 --- a/src/test/java/org/apache/datasketches/sampling/EbppsSketchTest.java +++ b/src/test/java/org/apache/datasketches/sampling/EbppsSketchTest.java @@ -65,7 +65,7 @@ static void checkIfEqual(EbppsItemsSketch sk1, EbppsItemsSketch sk2) { assertTrue(sample1 != null && sample2 != null); final int len = Math.min(sample1.size(), sample2.size()); for (int i = 0; i < len; ++i) { - assertEquals(sample1.get(i), sample2.get(i)); + assertEquals(sample1.get(i), sample2.get(i)); } assertTrue((len == Math.floor(sk1.getC()) || len == Math.ceil(sk1.getC()))); @@ -201,7 +201,7 @@ public void serializeDeserializeString() { mem = Memory.wrap(bytes); sk_heapify = EbppsItemsSketch.heapify(mem, new ArrayOfStringsSerDe()); checkIfEqual(sk, sk_heapify); - + // non-empty with partial item sk.update(Integer.toString(2 * k), 2.5); assertEquals(sk.getCumulativeWeight(), k + 2.5, EPS); @@ -305,6 +305,6 @@ public void deserializeTooShort() { final byte[] bytes = sk.toByteArray(new ArrayOfLongsSerDe()); final WritableMemory mem = WritableMemory.writableWrap(bytes); final Memory shortMem = mem.region(0, mem.getCapacity() - 1); - EbppsItemsSketch.heapify(shortMem, new ArrayOfStringsSerDe()); + EbppsItemsSketch.heapify(shortMem, new ArrayOfLongsSerDe()); } }