Skip to content

Commit dc402cc

Browse files
authored
Merge pull request #714 from proost/test-cross-language-test-for-reservoir-sampling-sketch
test: cross language test cases for reservoir sampling sketch
2 parents f1ac363 + f406b06 commit dc402cc

File tree

1 file changed

+226
-0
lines changed

1 file changed

+226
-0
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.datasketches.sampling;
21+
22+
import org.apache.datasketches.common.ArrayOfDoublesSerDe;
23+
import org.apache.datasketches.common.ArrayOfLongsSerDe;
24+
import org.apache.datasketches.common.ArrayOfStringsSerDe;
25+
import org.testng.annotations.Test;
26+
27+
import java.io.IOException;
28+
import java.nio.file.Files;
29+
30+
import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
31+
import static org.apache.datasketches.common.TestUtil.javaPath;
32+
33+
/**
34+
* Serialize binary sketches to be tested by other language code.
35+
* Test deserialization of binary sketches serialized by other language code.
36+
*/
37+
public class ReservoirCrossLanguageTest {
38+
39+
@Test(groups = {GENERATE_JAVA_FILES})
40+
public void generateReservoirLongsSketchEmpty() throws IOException {
41+
final int k = 128;
42+
final ReservoirLongsSketch sk = ReservoirLongsSketch.newInstance(k);
43+
44+
Files.newOutputStream(javaPath.resolve("reservoir_longs_empty_k" + k + "_java.sk"))
45+
.write(sk.toByteArray());
46+
}
47+
48+
@Test(groups = {GENERATE_JAVA_FILES})
49+
public void generateReservoirLongsSketchExact() throws IOException {
50+
final int k = 128;
51+
final int[] nArr = {1, 10, 32, 100, 128};
52+
53+
for (final int n : nArr) {
54+
final ReservoirLongsSketch sk = ReservoirLongsSketch.newInstance(k);
55+
for (int i = 0; i < n; i++) {
56+
sk.update(i);
57+
}
58+
Files.newOutputStream(javaPath.resolve("reservoir_longs_exact_n" + n + "_k" + k + "_java.sk"))
59+
.write(sk.toByteArray());
60+
}
61+
}
62+
63+
@Test(groups = {GENERATE_JAVA_FILES})
64+
public void generateReservoirLongsSketchSampling() throws IOException {
65+
final int[] kArr = {32, 64, 128};
66+
final long n = 1000;
67+
68+
for (final int k : kArr) {
69+
final long[] predeterminedSamples = new long[k];
70+
for (int i = 0; i < k; i++) {
71+
predeterminedSamples[i] = i * 2;
72+
}
73+
74+
final ReservoirLongsSketch sk = ReservoirLongsSketch.getInstance(
75+
predeterminedSamples,
76+
n,
77+
org.apache.datasketches.common.ResizeFactor.X8,
78+
k
79+
);
80+
81+
Files.newOutputStream(javaPath.resolve("reservoir_longs_sampling_n" + n + "_k" + k + "_java.sk"))
82+
.write(sk.toByteArray());
83+
}
84+
}
85+
86+
@Test(groups = {GENERATE_JAVA_FILES})
87+
public void generateReservoirItemsSketchLongEmpty() throws IOException {
88+
final int k = 128;
89+
final ReservoirItemsSketch<Long> sk = ReservoirItemsSketch.newInstance(k);
90+
91+
Files.newOutputStream(javaPath.resolve("reservoir_items_long_empty_k" + k + "_java.sk"))
92+
.write(sk.toByteArray(new ArrayOfLongsSerDe()));
93+
}
94+
95+
@Test(groups = {GENERATE_JAVA_FILES})
96+
public void generateReservoirItemsSketchLongExact() throws IOException {
97+
final int k = 128;
98+
final int[] nArr = {1, 10, 32, 100, 128};
99+
100+
for (final int n : nArr) {
101+
final ReservoirItemsSketch<Long> sk = ReservoirItemsSketch.newInstance(k);
102+
for (int i = 0; i < n; i++) {
103+
sk.update((long) i);
104+
}
105+
Files.newOutputStream(javaPath.resolve("reservoir_items_long_exact_n" + n + "_k" + k + "_java.sk"))
106+
.write(sk.toByteArray(new ArrayOfLongsSerDe()));
107+
}
108+
}
109+
110+
@Test(groups = {GENERATE_JAVA_FILES})
111+
public void generateReservoirItemsSketchLongSampling() throws IOException {
112+
final int[] kArr = {32, 64, 128};
113+
final long n = 1000;
114+
115+
for (final int k : kArr) {
116+
final java.util.ArrayList<Long> predeterminedSamples = new java.util.ArrayList<>();
117+
for (int i = 0; i < k; i++) {
118+
predeterminedSamples.add((long) (i * 2));
119+
}
120+
121+
final ReservoirItemsSketch<Long> sk = ReservoirItemsSketch.newInstance(
122+
predeterminedSamples,
123+
n,
124+
org.apache.datasketches.common.ResizeFactor.X8,
125+
k
126+
);
127+
128+
Files.newOutputStream(javaPath.resolve("reservoir_items_long_sampling_n" + n + "_k" + k + "_java.sk"))
129+
.write(sk.toByteArray(new ArrayOfLongsSerDe()));
130+
}
131+
}
132+
133+
@Test(groups = {GENERATE_JAVA_FILES})
134+
public void generateReservoirItemsSketchDoubleEmpty() throws IOException {
135+
final int k = 128;
136+
final ReservoirItemsSketch<Double> sk = ReservoirItemsSketch.newInstance(k);
137+
138+
Files.newOutputStream(javaPath.resolve("reservoir_items_double_empty_k" + k + "_java.sk"))
139+
.write(sk.toByteArray(new ArrayOfDoublesSerDe()));
140+
}
141+
142+
@Test(groups = {GENERATE_JAVA_FILES})
143+
public void generateReservoirItemsSketchDoubleExact() throws IOException {
144+
final int k = 128;
145+
final int[] nArr = {1, 10, 32, 100, 128};
146+
147+
for (final int n : nArr) {
148+
final ReservoirItemsSketch<Double> sk = ReservoirItemsSketch.newInstance(k);
149+
for (int i = 0; i < n; i++) {
150+
sk.update((double) i);
151+
}
152+
Files.newOutputStream(javaPath.resolve("reservoir_items_double_exact_n" + n + "_k" + k + "_java.sk"))
153+
.write(sk.toByteArray(new ArrayOfDoublesSerDe()));
154+
}
155+
}
156+
157+
@Test(groups = {GENERATE_JAVA_FILES})
158+
public void generateReservoirItemsSketchDoubleSampling() throws IOException {
159+
final int[] kArr = {32, 64, 128};
160+
final long n = 1000;
161+
162+
for (final int k : kArr) {
163+
final java.util.ArrayList<Double> predeterminedSamples = new java.util.ArrayList<>();
164+
for (int i = 0; i < k; i++) {
165+
predeterminedSamples.add((double) (i * 2));
166+
}
167+
168+
final ReservoirItemsSketch<Double> sk = ReservoirItemsSketch.newInstance(
169+
predeterminedSamples,
170+
n,
171+
org.apache.datasketches.common.ResizeFactor.X8,
172+
k
173+
);
174+
175+
Files.newOutputStream(javaPath.resolve("reservoir_items_double_sampling_n" + n + "_k" + k + "_java.sk"))
176+
.write(sk.toByteArray(new ArrayOfDoublesSerDe()));
177+
}
178+
}
179+
180+
@Test(groups = {GENERATE_JAVA_FILES})
181+
public void generateReservoirItemsSketchStringEmpty() throws IOException {
182+
final int k = 128;
183+
final ReservoirItemsSketch<String> sk = ReservoirItemsSketch.newInstance(k);
184+
185+
Files.newOutputStream(javaPath.resolve("reservoir_items_string_empty_k" + k + "_java.sk"))
186+
.write(sk.toByteArray(new ArrayOfStringsSerDe()));
187+
}
188+
189+
@Test(groups = {GENERATE_JAVA_FILES})
190+
public void generateReservoirItemsSketchStringExact() throws IOException {
191+
final int k = 128;
192+
final int[] nArr = {1, 10, 32, 100, 128};
193+
194+
for (final int n : nArr) {
195+
final ReservoirItemsSketch<String> sk = ReservoirItemsSketch.newInstance(k);
196+
for (int i = 0; i < n; i++) {
197+
sk.update("item" + i);
198+
}
199+
Files.newOutputStream(javaPath.resolve("reservoir_items_string_exact_n" + n + "_k" + k + "_java.sk"))
200+
.write(sk.toByteArray(new ArrayOfStringsSerDe()));
201+
}
202+
}
203+
204+
@Test(groups = {GENERATE_JAVA_FILES})
205+
public void generateReservoirItemsSketchStringSampling() throws IOException {
206+
final int[] kArr = {32, 64, 128};
207+
final long n = 1000;
208+
209+
for (final int k : kArr) {
210+
final java.util.ArrayList<String> predeterminedSamples = new java.util.ArrayList<>();
211+
for (int i = 0; i < k; i++) {
212+
predeterminedSamples.add("item" + (i * 2));
213+
}
214+
215+
final ReservoirItemsSketch<String> sk = ReservoirItemsSketch.newInstance(
216+
predeterminedSamples,
217+
n,
218+
org.apache.datasketches.common.ResizeFactor.X8,
219+
k
220+
);
221+
222+
Files.newOutputStream(javaPath.resolve("reservoir_items_string_sampling_n" + n + "_k" + k + "_java.sk"))
223+
.write(sk.toByteArray(new ArrayOfStringsSerDe()));
224+
}
225+
}
226+
}

0 commit comments

Comments
 (0)