Skip to content

Commit

Permalink
Merge pull request #460 from apache/cpp_serde_compat_testing
Browse files Browse the repository at this point in the history
generate serialized KLL sketches for C++
  • Loading branch information
leerho authored Jul 24, 2023
2 parents 5e037bb + f9450b0 commit 6fff66b
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 140 deletions.
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ under the License.
<!-- other -->
<lifecycle-mapping.version>1.0.0</lifecycle-mapping.version>
<git-commit-id-plugin.version>4.9.10</git-commit-id-plugin.version>

<testng.excludedgroups>generate</testng.excludedgroups>
</properties>

<dependencies>
Expand Down Expand Up @@ -308,6 +310,7 @@ under the License.
<useManifestOnlyJar>false</useManifestOnlyJar>
<redirectTestOutputToFile>true</redirectTestOutputToFile>
<reportsDirectory>${project.build.directory}/test-output/${maven.build.timestamp}</reportsDirectory>
<excludedGroups>${testng.excludedgroups}</excludedGroups>
</configuration>
</plugin>

Expand Down
16 changes: 16 additions & 0 deletions src/test/java/org/apache/datasketches/cpc/CpcCBinariesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static org.testng.Assert.assertEquals;

import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;

import org.apache.datasketches.memory.MapHandle;
Expand Down Expand Up @@ -282,6 +283,21 @@ public void genSparseSketch() {
println(sk2.toString(true));
}

@Test(groups = {"generate"})
public void generateBinariesForCompatibilityTesting() throws Exception {
final int[] nArr = {0, 100, 200, 2000, 20000};
final Flavor[] flavorArr = {Flavor.EMPTY, Flavor.SPARSE, Flavor.HYBRID, Flavor.PINNED, Flavor.SLIDING};
int flavorIdx = 0;
for (int n: nArr) {
final CpcSketch sketch = new CpcSketch(11);
for (int i = 0; i < n; i++) sketch.update(i);
assertEquals(sketch.getFlavor(), flavorArr[flavorIdx++]);
try (final FileOutputStream file = new FileOutputStream("cpc_n" + n + ".sk")) {
file.write(sketch.toByteArray());
}
}
}

@Test
public void printlnTest() {
println("PRINTING: " + this.getClass().getName());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.datasketches.kll;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;

import java.io.File;
import java.io.FileOutputStream;

import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.MapHandle;
import org.apache.datasketches.memory.Memory;
import org.testng.annotations.Test;

public class KllDoublesSketchSerDeTest {

@Test
public void serializeDeserializeEmpty() {
final KllDoublesSketch sketch1 = KllDoublesSketch.newHeapInstance();
final byte[] bytes = sketch1.toByteArray();
final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertTrue(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
try { sketch2.getMinItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch2.getMaxItem(); fail(); } catch (IllegalArgumentException e) {}
assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes());
}

@Test
public void serializeDeserializeOneValue() {
final KllDoublesSketch sketch1 = KllDoublesSketch.newHeapInstance();
sketch1.update(1);
final byte[] bytes = sketch1.toByteArray();
final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), 1);
assertEquals(sketch2.getN(), 1);
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertEquals(sketch2.getMinItem(), 1.0);
assertEquals(sketch2.getMaxItem(), 1.0);
assertEquals(sketch2.getSerializedSizeBytes(), 8 + Double.BYTES);
}

@Test
public void serializeDeserialize() {
final KllDoublesSketch sketch1 = KllDoublesSketch.newHeapInstance();
final int n = 1000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
}
final byte[] bytes = sketch1.toByteArray();
final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertEquals(sketch2.getMinItem(), sketch1.getMinItem());
assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem());
assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes());
}

@Test
public void compatibilityWithCppEstimationMode() throws Exception {
final File file = Util.getResourceFile("kll_double_estimation_cpp.sk");
try (MapHandle mh = Memory.map(file)) {
final KllDoublesSketch sketch = KllDoublesSketch.heapify(mh.get());
assertEquals(sketch.getMinItem(), 0);
assertEquals(sketch.getMaxItem(), 999);
assertEquals(sketch.getN(), 1000);
}
}

@Test(groups = {"generate"})
public void generateBinariesForCompatibilityTesting() throws Exception {
final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (int n: nArr) {
final KllDoublesSketch sketch = KllDoublesSketch.newHeapInstance();
for (int i = 0; i < n; i++) sketch.update(i);
try (final FileOutputStream file = new FileOutputStream("kll_double_n" + n + ".sk")) {
file.write(sketch.toByteArray());
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import static org.testng.Assert.fail;

import java.io.File;
import java.io.FileOutputStream;

import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.Util;
Expand Down Expand Up @@ -398,66 +399,6 @@ public void maxK() {
assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256);
}

@Test
public void serializeDeserializeEmpty() {
final KllDoublesSketch sketch1 = KllDoublesSketch.newHeapInstance();
final byte[] bytes = sketch1.toByteArray();
final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes());
assertTrue(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
try { sketch2.getMinItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch2.getMaxItem(); fail(); } catch (IllegalArgumentException e) {}
assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes());
}

@Test
public void serializeDeserializeOneValue() {
final KllDoublesSketch sketch1 = KllDoublesSketch.newHeapInstance();
sketch1.update(1);
final byte[] bytes = sketch1.toByteArray();
final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), 1);
assertEquals(sketch2.getN(), 1);
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertEquals(sketch2.getMinItem(), 1.0);
assertEquals(sketch2.getMaxItem(), 1.0);
assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), 8 + Double.BYTES);
}

//@Test //not implemented from C++ yet
//public void deserializeOneValueV1() throws Exception {
// final byte[] bytes = getResourceBytes("kll_sketch_float_one_value_v1.sk");
// final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes));
// assertFalse(sketch.isEmpty());
// assertFalse(sketch.isEstimationMode());
// assertEquals(sketch.getN(), 1);
// assertEquals(sketch.getNumRetained(), 1);
//}

@Test
public void serializeDeserialize() {
final KllDoublesSketch sketch1 = KllDoublesSketch.newHeapInstance();
final int n = 1000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
}
final byte[] bytes = sketch1.toByteArray();
final KllDoublesSketch sketch2 = KllDoublesSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getCurrentCompactSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertEquals(sketch2.getMinItem(), sketch1.getMinItem());
assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem());
assertEquals(sketch2.getCurrentCompactSerializedSizeBytes(), sketch1.getCurrentCompactSerializedSizeBytes());
}

@Test(expectedExceptions = SketchesArgumentException.class)
public void outOfOrderSplitPoints() {
final KllDoublesSketch sketch = KllDoublesSketch.newHeapInstance();
Expand Down Expand Up @@ -588,16 +529,6 @@ public void checkCDF_PDF() {
}
}

@Test
public void compatibilityWithCppEstimationMode() throws Exception {
final File file = Util.getResourceFile("kll_double_estimation_cpp.sk");
try (MapHandle mh = Memory.map(file)) {
final KllDoublesSketch sketch = KllDoublesSketch.heapify(mh.get());
assertEquals(sketch.getMinItem(), 0);
assertEquals(sketch.getMaxItem(), 999);
}
}

private final static boolean enablePrinting = false;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.datasketches.kll;

import static org.apache.datasketches.common.Util.getResourceBytes;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;

import java.io.File;
import java.io.FileOutputStream;

import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.MapHandle;
import org.apache.datasketches.memory.Memory;
import org.testng.annotations.Test;

public class KllFloatsSketchSerDeTest {

@Test
public void serializeDeserializeEmpty() {
final KllFloatsSketch sketch1 = KllFloatsSketch.newHeapInstance();
final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertTrue(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
try { sketch2.getMinItem(); fail(); } catch (IllegalArgumentException e) {}
try { sketch2.getMaxItem(); fail(); } catch (IllegalArgumentException e) {}
assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes());
}

@Test
public void serializeDeserializeOneValue() {
final KllFloatsSketch sketch1 = KllFloatsSketch.newHeapInstance();
sketch1.update(1);
final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), 1);
assertEquals(sketch2.getN(), 1);
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertFalse(Float.isNaN(sketch2.getMinItem()));
assertFalse(Float.isNaN(sketch2.getMaxItem()));
assertEquals(sketch2.getSerializedSizeBytes(), 8 + Float.BYTES);
}

@Test
public void deserializeOneValueV1() throws Exception {
final byte[] bytes = getResourceBytes("kll_sketch_float_one_item_v1.sk");
final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertFalse(sketch.isEmpty());
assertFalse(sketch.isEstimationMode());
assertEquals(sketch.getN(), 1);
assertEquals(sketch.getNumRetained(), 1);
}

@Test
public void serializeDeserialize() {
final KllFloatsSketch sketch1 = KllFloatsSketch.newHeapInstance();
final int n = 1000;
for (int i = 0; i < n; i++) {
sketch1.update(i);
}
final byte[] bytes = sketch1.toByteArray();
final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes));
assertEquals(bytes.length, sketch1.getSerializedSizeBytes());
assertFalse(sketch2.isEmpty());
assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained());
assertEquals(sketch2.getN(), sketch1.getN());
assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false));
assertEquals(sketch2.getMinItem(), sketch1.getMinItem());
assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem());
assertEquals(sketch2.getSerializedSizeBytes(), sketch1.getSerializedSizeBytes());
}

@Test
public void compatibilityWithCppEstimationMode() throws Exception {
final File file = Util.getResourceFile("kll_float_estimation_cpp.sk");
try (final MapHandle mh = Memory.map(file)) {
final KllFloatsSketch sketch = KllFloatsSketch.heapify(mh.get());
assertEquals(sketch.getMinItem(), 0);
assertEquals(sketch.getMaxItem(), 999);
}
}

@Test(groups = {"generate"})
public void generateBinariesForCompatibilityTesting() throws Exception {
final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
for (int n: nArr) {
final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance();
for (int i = 0; i < n; i++) sketch.update(i);
try (final FileOutputStream file = new FileOutputStream("kll_float_n" + n + ".sk")) {
file.write(sketch.toByteArray());
}
}
}

}
Loading

0 comments on commit 6fff66b

Please sign in to comment.