Skip to content

Commit fe7239b

Browse files
icexellosskou
authored andcommitted
ARROW-411: [Java] Move compactor functions in Integration to a separate Validator module
Author: Li Jin <ice.xelloss@gmail.com> Closes apache#267 from icexelloss/validator and squashes the following commits: b4e86c5 [Li Jin] ARROW-411: Move compator functions in Integration to a separate Validator moduleO
1 parent 1daa617 commit fe7239b

File tree

4 files changed

+185
-123
lines changed

4 files changed

+185
-123
lines changed

tools/src/main/java/org/apache/arrow/tools/Integration.java

Lines changed: 3 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929
import org.apache.arrow.memory.BufferAllocator;
3030
import org.apache.arrow.memory.RootAllocator;
31-
import org.apache.arrow.vector.FieldVector;
3231
import org.apache.arrow.vector.VectorLoader;
3332
import org.apache.arrow.vector.VectorSchemaRoot;
3433
import org.apache.arrow.vector.VectorUnloader;
@@ -39,10 +38,8 @@
3938
import org.apache.arrow.vector.file.json.JsonFileReader;
4039
import org.apache.arrow.vector.file.json.JsonFileWriter;
4140
import org.apache.arrow.vector.schema.ArrowRecordBatch;
42-
import org.apache.arrow.vector.types.pojo.ArrowType;
43-
import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
44-
import org.apache.arrow.vector.types.pojo.Field;
4541
import org.apache.arrow.vector.types.pojo.Schema;
42+
import org.apache.arrow.vector.util.Validator;
4643
import org.apache.commons.cli.CommandLine;
4744
import org.apache.commons.cli.CommandLineParser;
4845
import org.apache.commons.cli.Options;
@@ -51,8 +48,6 @@
5148
import org.slf4j.Logger;
5249
import org.slf4j.LoggerFactory;
5350

54-
import com.google.common.base.Objects;
55-
5651
public class Integration {
5752
private static final Logger LOGGER = LoggerFactory.getLogger(Integration.class);
5853

@@ -143,7 +138,7 @@ public void execute(File arrowFile, File jsonFile) throws IOException {
143138
LOGGER.debug("ARROW schema: " + arrowSchema);
144139
LOGGER.debug("JSON Input file size: " + jsonFile.length());
145140
LOGGER.debug("JSON schema: " + jsonSchema);
146-
compareSchemas(jsonSchema, arrowSchema);
141+
Validator.compareSchemas(jsonSchema, arrowSchema);
147142

148143
List<ArrowBlock> recordBatches = footer.getRecordBatches();
149144
Iterator<ArrowBlock> iterator = recordBatches.iterator();
@@ -154,8 +149,7 @@ public void execute(File arrowFile, File jsonFile) throws IOException {
154149
VectorSchemaRoot arrowRoot = new VectorSchemaRoot(arrowSchema, allocator);) {
155150
VectorLoader vectorLoader = new VectorLoader(arrowRoot);
156151
vectorLoader.load(inRecordBatch);
157-
// TODO: compare
158-
compare(arrowRoot, jsonRoot);
152+
Validator.compareVectorSchemaRoot(arrowRoot, jsonRoot);
159153
}
160154
jsonRoot.close();
161155
}
@@ -227,86 +221,4 @@ private static void fatalError(String message, Throwable e) {
227221
System.exit(1);
228222
}
229223

230-
231-
private static void compare(VectorSchemaRoot arrowRoot, VectorSchemaRoot jsonRoot) {
232-
compareSchemas(jsonRoot.getSchema(), arrowRoot.getSchema());
233-
if (arrowRoot.getRowCount() != jsonRoot.getRowCount()) {
234-
throw new IllegalArgumentException("Different row count:\n" + arrowRoot.getRowCount() + "\n" + jsonRoot.getRowCount());
235-
}
236-
List<FieldVector> arrowVectors = arrowRoot.getFieldVectors();
237-
List<FieldVector> jsonVectors = jsonRoot.getFieldVectors();
238-
if (arrowVectors.size() != jsonVectors.size()) {
239-
throw new IllegalArgumentException("Different column count:\n" + arrowVectors.size() + "\n" + jsonVectors.size());
240-
}
241-
for (int i = 0; i < arrowVectors.size(); i++) {
242-
Field field = arrowRoot.getSchema().getFields().get(i);
243-
FieldVector arrowVector = arrowVectors.get(i);
244-
FieldVector jsonVector = jsonVectors.get(i);
245-
int valueCount = arrowVector.getAccessor().getValueCount();
246-
if (valueCount != jsonVector.getAccessor().getValueCount()) {
247-
throw new IllegalArgumentException("Different value count for field " + field + " : " + valueCount + " != " + jsonVector.getAccessor().getValueCount());
248-
}
249-
for (int j = 0; j < valueCount; j++) {
250-
Object arrow = arrowVector.getAccessor().getObject(j);
251-
Object json = jsonVector.getAccessor().getObject(j);
252-
if (!equals(field.getType(), arrow, json)) {
253-
throw new IllegalArgumentException(
254-
"Different values in column:\n" + field + " at index " + j + ": " + arrow + " != " + json);
255-
}
256-
}
257-
}
258-
}
259-
260-
private static boolean equals(ArrowType type, final Object arrow, final Object json) {
261-
if (type instanceof ArrowType.FloatingPoint) {
262-
FloatingPoint fpType = (FloatingPoint) type;
263-
switch (fpType.getPrecision()) {
264-
case DOUBLE:
265-
return equalEnough((Double)arrow, (Double)json);
266-
case SINGLE:
267-
return equalEnough((Float)arrow, (Float)json);
268-
case HALF:
269-
default:
270-
throw new UnsupportedOperationException("unsupported precision: " + fpType);
271-
}
272-
}
273-
return Objects.equal(arrow, json);
274-
}
275-
276-
static boolean equalEnough(Float f1, Float f2) {
277-
if (f1 == null || f2 == null) {
278-
return f1 == null && f2 == null;
279-
}
280-
if (f1.isNaN()) {
281-
return f2.isNaN();
282-
}
283-
if (f1.isInfinite()) {
284-
return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
285-
}
286-
float average = Math.abs((f1 + f2) / 2);
287-
float differenceScaled = Math.abs(f1 - f2) / (average == 0.0f ? 1f : average);
288-
return differenceScaled < 1.0E-6f;
289-
}
290-
291-
static boolean equalEnough(Double f1, Double f2) {
292-
if (f1 == null || f2 == null) {
293-
return f1 == null && f2 == null;
294-
}
295-
if (f1.isNaN()) {
296-
return f2.isNaN();
297-
}
298-
if (f1.isInfinite()) {
299-
return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
300-
}
301-
double average = Math.abs((f1 + f2) / 2);
302-
double differenceScaled = Math.abs(f1 - f2) / (average == 0.0d ? 1d : average);
303-
return differenceScaled < 1.0E-12d;
304-
}
305-
306-
307-
private static void compareSchemas(Schema jsonSchema, Schema arrowSchema) {
308-
if (!arrowSchema.equals(jsonSchema)) {
309-
throw new IllegalArgumentException("Different schemas:\n" + arrowSchema + "\n" + jsonSchema);
310-
}
311-
}
312224
}

tools/src/test/java/org/apache/arrow/tools/TestIntegration.java

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@
2222
import static org.apache.arrow.tools.ArrowFileTestFixtures.write;
2323
import static org.apache.arrow.tools.ArrowFileTestFixtures.writeData;
2424
import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
25-
import static org.apache.arrow.tools.Integration.equalEnough;
2625
import static org.junit.Assert.assertEquals;
27-
import static org.junit.Assert.assertFalse;
2826
import static org.junit.Assert.assertTrue;
2927
import static org.junit.Assert.fail;
3028

@@ -238,34 +236,4 @@ static void writeInput2(File testInFile, BufferAllocator allocator) throws FileN
238236
write(parent.getChild("root"), testInFile);
239237
}
240238
}
241-
242-
@Test
243-
public void testFloatComp() {
244-
assertTrue(equalEnough(912.4140000000002F, 912.414F));
245-
assertTrue(equalEnough(912.4140000000002D, 912.414D));
246-
assertTrue(equalEnough(912.414F, 912.4140000000002F));
247-
assertTrue(equalEnough(912.414D, 912.4140000000002D));
248-
assertFalse(equalEnough(912.414D, 912.4140001D));
249-
assertFalse(equalEnough(null, 912.414D));
250-
assertTrue(equalEnough((Float)null, null));
251-
assertTrue(equalEnough((Double)null, null));
252-
assertFalse(equalEnough(912.414D, null));
253-
assertFalse(equalEnough(Double.MAX_VALUE, Double.MIN_VALUE));
254-
assertFalse(equalEnough(Double.MIN_VALUE, Double.MAX_VALUE));
255-
assertTrue(equalEnough(Double.MAX_VALUE, Double.MAX_VALUE));
256-
assertTrue(equalEnough(Double.MIN_VALUE, Double.MIN_VALUE));
257-
assertTrue(equalEnough(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
258-
assertFalse(equalEnough(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
259-
assertTrue(equalEnough(Double.NaN, Double.NaN));
260-
assertFalse(equalEnough(1.0, Double.NaN));
261-
assertFalse(equalEnough(Float.MAX_VALUE, Float.MIN_VALUE));
262-
assertFalse(equalEnough(Float.MIN_VALUE, Float.MAX_VALUE));
263-
assertTrue(equalEnough(Float.MAX_VALUE, Float.MAX_VALUE));
264-
assertTrue(equalEnough(Float.MIN_VALUE, Float.MIN_VALUE));
265-
assertTrue(equalEnough(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY));
266-
assertFalse(equalEnough(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
267-
assertTrue(equalEnough(Float.NaN, Float.NaN));
268-
assertFalse(equalEnough(1.0F, Float.NaN));
269-
}
270-
271239
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.arrow.vector.util;
19+
20+
import java.util.List;
21+
22+
import org.apache.arrow.vector.FieldVector;
23+
import org.apache.arrow.vector.VectorSchemaRoot;
24+
import org.apache.arrow.vector.types.pojo.ArrowType;
25+
import org.apache.arrow.vector.types.pojo.Field;
26+
import org.apache.arrow.vector.types.pojo.Schema;
27+
28+
import com.google.common.base.Objects;
29+
30+
/**
31+
* Utility class for validating arrow data structures
32+
*/
33+
public class Validator {
34+
35+
/**
36+
* Validate two arrow schemas are equal.
37+
*
38+
* @throws IllegalArgumentException if they are different.
39+
*/
40+
public static void compareSchemas(Schema schema1, Schema schema2) {
41+
if (!schema2.equals(schema1)) {
42+
throw new IllegalArgumentException("Different schemas:\n" + schema2 + "\n" + schema1);
43+
}
44+
}
45+
46+
/**
47+
* Validate two arrow vectorSchemaRoot are equal.
48+
*
49+
* @throws IllegalArgumentException if they are different.
50+
*/
51+
public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) {
52+
compareSchemas(root2.getSchema(), root1.getSchema());
53+
if (root1.getRowCount() != root2.getRowCount()) {
54+
throw new IllegalArgumentException("Different row count:\n" + root1.getRowCount() + "\n" + root2.getRowCount());
55+
}
56+
List<FieldVector> arrowVectors = root1.getFieldVectors();
57+
List<FieldVector> jsonVectors = root2.getFieldVectors();
58+
if (arrowVectors.size() != jsonVectors.size()) {
59+
throw new IllegalArgumentException("Different column count:\n" + arrowVectors.size() + "\n" + jsonVectors.size());
60+
}
61+
for (int i = 0; i < arrowVectors.size(); i++) {
62+
Field field = root1.getSchema().getFields().get(i);
63+
FieldVector arrowVector = arrowVectors.get(i);
64+
FieldVector jsonVector = jsonVectors.get(i);
65+
int valueCount = arrowVector.getAccessor().getValueCount();
66+
if (valueCount != jsonVector.getAccessor().getValueCount()) {
67+
throw new IllegalArgumentException("Different value count for field " + field + " : " + valueCount + " != " + jsonVector.getAccessor().getValueCount());
68+
}
69+
for (int j = 0; j < valueCount; j++) {
70+
Object arrow = arrowVector.getAccessor().getObject(j);
71+
Object json = jsonVector.getAccessor().getObject(j);
72+
if (!equals(field.getType(), arrow, json)) {
73+
throw new IllegalArgumentException(
74+
"Different values in column:\n" + field + " at index " + j + ": " + arrow + " != " + json);
75+
}
76+
}
77+
}
78+
}
79+
80+
static boolean equals(ArrowType type, final Object o1, final Object o2) {
81+
if (type instanceof ArrowType.FloatingPoint) {
82+
ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type;
83+
switch (fpType.getPrecision()) {
84+
case DOUBLE:
85+
return equalEnough((Double)o1, (Double)o2);
86+
case SINGLE:
87+
return equalEnough((Float)o1, (Float)o2);
88+
case HALF:
89+
default:
90+
throw new UnsupportedOperationException("unsupported precision: " + fpType);
91+
}
92+
}
93+
return Objects.equal(o1, o2);
94+
}
95+
96+
static boolean equalEnough(Float f1, Float f2) {
97+
if (f1 == null || f2 == null) {
98+
return f1 == null && f2 == null;
99+
}
100+
if (f1.isNaN()) {
101+
return f2.isNaN();
102+
}
103+
if (f1.isInfinite()) {
104+
return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
105+
}
106+
float average = Math.abs((f1 + f2) / 2);
107+
float differenceScaled = Math.abs(f1 - f2) / (average == 0.0f ? 1f : average);
108+
return differenceScaled < 1.0E-6f;
109+
}
110+
111+
static boolean equalEnough(Double f1, Double f2) {
112+
if (f1 == null || f2 == null) {
113+
return f1 == null && f2 == null;
114+
}
115+
if (f1.isNaN()) {
116+
return f2.isNaN();
117+
}
118+
if (f1.isInfinite()) {
119+
return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
120+
}
121+
double average = Math.abs((f1 + f2) / 2);
122+
double differenceScaled = Math.abs(f1 - f2) / (average == 0.0d ? 1d : average);
123+
return differenceScaled < 1.0E-12d;
124+
}
125+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.arrow.vector.util;
20+
21+
import static org.apache.arrow.vector.util.Validator.equalEnough;
22+
import static org.junit.Assert.assertFalse;
23+
import static org.junit.Assert.assertTrue;
24+
25+
import org.junit.Test;
26+
27+
public class TestValidator {
28+
29+
@Test
30+
public void testFloatComp() {
31+
assertTrue(equalEnough(912.4140000000002F, 912.414F));
32+
assertTrue(equalEnough(912.4140000000002D, 912.414D));
33+
assertTrue(equalEnough(912.414F, 912.4140000000002F));
34+
assertTrue(equalEnough(912.414D, 912.4140000000002D));
35+
assertFalse(equalEnough(912.414D, 912.4140001D));
36+
assertFalse(equalEnough(null, 912.414D));
37+
assertTrue(equalEnough((Float)null, null));
38+
assertTrue(equalEnough((Double)null, null));
39+
assertFalse(equalEnough(912.414D, null));
40+
assertFalse(equalEnough(Double.MAX_VALUE, Double.MIN_VALUE));
41+
assertFalse(equalEnough(Double.MIN_VALUE, Double.MAX_VALUE));
42+
assertTrue(equalEnough(Double.MAX_VALUE, Double.MAX_VALUE));
43+
assertTrue(equalEnough(Double.MIN_VALUE, Double.MIN_VALUE));
44+
assertTrue(equalEnough(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
45+
assertFalse(equalEnough(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
46+
assertTrue(equalEnough(Double.NaN, Double.NaN));
47+
assertFalse(equalEnough(1.0, Double.NaN));
48+
assertFalse(equalEnough(Float.MAX_VALUE, Float.MIN_VALUE));
49+
assertFalse(equalEnough(Float.MIN_VALUE, Float.MAX_VALUE));
50+
assertTrue(equalEnough(Float.MAX_VALUE, Float.MAX_VALUE));
51+
assertTrue(equalEnough(Float.MIN_VALUE, Float.MIN_VALUE));
52+
assertTrue(equalEnough(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY));
53+
assertFalse(equalEnough(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
54+
assertTrue(equalEnough(Float.NaN, Float.NaN));
55+
assertFalse(equalEnough(1.0F, Float.NaN));
56+
}
57+
}

0 commit comments

Comments
 (0)