Skip to content

Commit a04091f

Browse files
committed
Drafting file round trip helper executable
Change-Id: I7e09c9144f373373a081ff4e0aaf687eca4ed176
1 parent 3d2e4df commit a04091f

File tree

4 files changed

+173
-17
lines changed

4 files changed

+173
-17
lines changed

java/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,5 +467,6 @@
467467
<module>format</module>
468468
<module>memory</module>
469469
<module>vector</module>
470+
<module>tools</module>
470471
</modules>
471472
</project>

java/tools/pom.xml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?xml version="1.0"?>
2+
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
3+
license agreements. See the NOTICE file distributed with this work for additional
4+
information regarding copyright ownership. The ASF licenses this file to
5+
You under the Apache License, Version 2.0 (the "License"); you may not use
6+
this file except in compliance with the License. You may obtain a copy of
7+
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
8+
by applicable law or agreed to in writing, software distributed under the
9+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
10+
OF ANY KIND, either express or implied. See the License for the specific
11+
language governing permissions and limitations under the License. -->
12+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
13+
<modelVersion>4.0.0</modelVersion>
14+
<parent>
15+
<groupId>org.apache.arrow</groupId>
16+
<artifactId>arrow-java-root</artifactId>
17+
<version>0.1.1-SNAPSHOT</version>
18+
</parent>
19+
<artifactId>arrow-tools</artifactId>
20+
<name>Arrow Tools</name>
21+
22+
<dependencies>
23+
<dependency>
24+
<groupId>org.apache.arrow</groupId>
25+
<artifactId>arrow-format</artifactId>
26+
<version>${project.version}</version>
27+
</dependency>
28+
<dependency>
29+
<groupId>org.apache.arrow</groupId>
30+
<artifactId>arrow-memory</artifactId>
31+
<version>${project.version}</version>
32+
</dependency>
33+
<dependency>
34+
<groupId>org.apache.arrow</groupId>
35+
<artifactId>arrow-vector</artifactId>
36+
<version>${project.version}</version>
37+
</dependency>
38+
<dependency>
39+
<groupId>org.apache.commons</groupId>
40+
<artifactId>commons-lang3</artifactId>
41+
<version>3.4</version>
42+
</dependency>
43+
<dependency>
44+
<groupId>commons-cli</groupId>
45+
<artifactId>commons-cli</artifactId>
46+
<version>1.2</version>
47+
</dependency>
48+
</dependencies>
49+
50+
<build>
51+
</build>
52+
53+
54+
55+
</project>
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.arrow.tools;
20+
21+
import org.apache.arrow.memory.BufferAllocator;
22+
import org.apache.arrow.memory.RootAllocator;
23+
import org.apache.arrow.vector.VectorLoader;
24+
import org.apache.arrow.vector.VectorUnloader;
25+
import org.apache.arrow.vector.complex.NullableMapVector;
26+
import org.apache.arrow.vector.file.ArrowBlock;
27+
import org.apache.arrow.vector.file.ArrowFooter;
28+
import org.apache.arrow.vector.file.ArrowReader;
29+
import org.apache.arrow.vector.file.ArrowWriter;
30+
import org.apache.arrow.vector.schema.ArrowRecordBatch;
31+
import org.apache.arrow.vector.types.Types;
32+
import org.apache.arrow.vector.types.pojo.Schema;
33+
import org.apache.commons.cli.CommandLine;
34+
import org.apache.commons.cli.CommandLineParser;
35+
import org.apache.commons.cli.Option;
36+
import org.apache.commons.cli.OptionBuilder;
37+
import org.apache.commons.cli.Options;
38+
import org.apache.commons.cli.PosixParser;
39+
import org.slf4j.Logger;
40+
import org.slf4j.LoggerFactory;
41+
42+
import java.io.File;
43+
import java.io.FileInputStream;
44+
import java.io.FileOutputStream;
45+
import java.util.Arrays;
46+
import java.util.List;
47+
48+
public class FileRoundtrip {
49+
private static final Logger LOGGER = LoggerFactory.getLogger(FileRoundtrip.class);
50+
public static final Options OPTIONS;
51+
52+
static {
53+
OPTIONS = new Options();
54+
}
55+
56+
public static void main(String[] args) {
57+
try {
58+
String[] cargs = Arrays.copyOfRange(args, 1, args.length);
59+
60+
CommandLineParser parser = new PosixParser();
61+
CommandLine cmd = parser.parse(OPTIONS, cargs, false);
62+
63+
String[] parsed_args = cmd.getArgs();
64+
65+
String inFileName = parsed_args[0];
66+
String outFileName = parsed_args[1];
67+
68+
BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
69+
70+
File inFile = new File(inFileName);
71+
FileInputStream fileInputStream = new FileInputStream(inFile);
72+
ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), allocator);
73+
74+
ArrowFooter footer = arrowReader.readFooter();
75+
Schema schema = footer.getSchema();
76+
LOGGER.debug("Found schema: " + schema);
77+
78+
File outFile = new File(outFileName);
79+
FileOutputStream fileOutputStream = new FileOutputStream(outFile);
80+
ArrowWriter arrowWriter = new ArrowWriter(fileOutputStream.getChannel(), schema);
81+
82+
// initialize vectors
83+
84+
List<ArrowBlock> recordBatches = footer.getRecordBatches();
85+
for (ArrowBlock rbBlock : recordBatches) {
86+
ArrowRecordBatch inRecordBatch = arrowReader.readRecordBatch(rbBlock);
87+
88+
NullableMapVector inParent = new NullableMapVector("parent", allocator, null);
89+
NullableMapVector root = inParent.addOrGet("root", Types.MinorType.MAP, NullableMapVector.class);
90+
VectorLoader vectorLoader = new VectorLoader(schema, root);
91+
vectorLoader.load(inRecordBatch);
92+
93+
NullableMapVector outParent = new NullableMapVector("parent", allocator, null);
94+
VectorUnloader vectorUnloader = new VectorUnloader(outParent);
95+
ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
96+
arrowWriter.writeRecordBatch(recordBatch);
97+
}
98+
} catch (Throwable th) {
99+
System.err.println(th.getMessage());
100+
}
101+
}
102+
}

java/vector/pom.xml

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
<?xml version="1.0"?>
2-
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
3-
license agreements. See the NOTICE file distributed with this work for additional
4-
information regarding copyright ownership. The ASF licenses this file to
5-
You under the Apache License, Version 2.0 (the "License"); you may not use
6-
this file except in compliance with the License. You may obtain a copy of
7-
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
8-
by applicable law or agreed to in writing, software distributed under the
9-
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
10-
OF ANY KIND, either express or implied. See the License for the specific
2+
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
3+
license agreements. See the NOTICE file distributed with this work for additional
4+
information regarding copyright ownership. The ASF licenses this file to
5+
You under the Apache License, Version 2.0 (the "License"); you may not use
6+
this file except in compliance with the License. You may obtain a copy of
7+
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
8+
by applicable law or agreed to in writing, software distributed under the
9+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
10+
OF ANY KIND, either express or implied. See the License for the specific
1111
language governing permissions and limitations under the License. -->
1212
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
1313
<modelVersion>4.0.0</modelVersion>
@@ -56,8 +56,6 @@
5656
<artifactId>commons-lang3</artifactId>
5757
<version>3.4</version>
5858
</dependency>
59-
60-
6159
</dependencies>
6260

6361
<pluginRepositories>
@@ -72,13 +70,13 @@
7270
<enabled>false</enabled>
7371
</snapshots>
7472
</pluginRepository>
75-
</pluginRepositories>
76-
73+
</pluginRepositories>
74+
7775
<build>
7876

7977
<resources>
8078
<resource>
81-
<!-- Copy freemarker template and fmpp configuration files of Vector's
79+
<!-- Copy freemarker template and fmpp configuration files of Vector's
8280
to allow clients to leverage definitions. -->
8381
<directory>${basedir}/src/main/codegen</directory>
8482
<targetPath>codegen</targetPath>
@@ -129,7 +127,7 @@
129127
</plugins>
130128
<pluginManagement>
131129
<plugins>
132-
<!--This plugin's configuration is used to store Eclipse m2e settings
130+
<!--This plugin's configuration is used to store Eclipse m2e settings
133131
only. It has no influence on the Maven build itself. -->
134132
<plugin>
135133
<groupId>org.eclipse.m2e</groupId>
@@ -160,8 +158,8 @@
160158
</plugin>
161159
</plugins>
162160
</pluginManagement>
163-
164-
161+
162+
165163
</build>
166164

167165

0 commit comments

Comments
 (0)