Skip to content

Commit a3c2680

Browse files
zhztheplayerzhouyuan
authored andcommitted
Add csv support (apache#9)
1 parent 608ee97 commit a3c2680

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed

cpp/src/jni/dataset/jni_wrapper.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ std::shared_ptr<arrow::dataset::FileFormat> GetFileFormat(JNIEnv *env, jint id)
196196
switch (id) {
197197
case 0:
198198
return std::make_shared<arrow::dataset::ParquetFileFormat>();
199+
case 1:
200+
return std::make_shared<arrow::dataset::CsvFileFormat>();
199201
default:
200202
std::string error_message = "illegal file format id: " + std::to_string(id);
201203
env->ThrowNew(illegal_argument_exception_class, error_message.c_str());

java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
* File format definitions.
2222
*/
2323
public enum FileFormat {
24-
PARQUET(0);
24+
PARQUET(0),
25+
CSV(1),
26+
NONE(-1);
2527

2628
private int id;
2729

java/dataset/src/test/java/org/apache/arrow/dataset/jni/NativeDatasetTest.java

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,26 @@
3939
import org.apache.arrow.dataset.source.DatasetFactory;
4040
import org.apache.arrow.memory.ReservationListener;
4141
import org.apache.arrow.memory.RootAllocator;
42+
import org.apache.arrow.util.AutoCloseables;
4243
import org.apache.arrow.vector.VectorSchemaRoot;
4344
import org.apache.arrow.vector.dictionary.Dictionary;
45+
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
46+
import org.apache.arrow.vector.types.Types;
4447
import org.apache.arrow.vector.types.pojo.Schema;
4548
import org.junit.Assert;
4649
import org.junit.Ignore;
4750
import org.junit.Test;
4851

52+
import static org.junit.Assert.assertEquals;
53+
4954
public class NativeDatasetTest {
5055

5156
private String sampleParquetLocal() {
52-
return "file://" + NativeDatasetTest.class.getResource(File.separator + "userdata1.parquet").getPath();
57+
return "file://" + resourcePath("userdata1.parquet");
58+
}
59+
60+
private String resourcePath(String resource) {
61+
return NativeDatasetTest.class.getResource(File.separator + resource).getPath();
5362
}
5463

5564
private void testDatasetFactoryEndToEnd(DatasetFactory factory, int taskCount, int vectorCount, int rowCount) {
@@ -319,6 +328,42 @@ public void testScannerWithEmptyProjector() {
319328
allocator.close();
320329
}
321330

331+
@Test
332+
public void testCsvRead() throws Exception {
333+
RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
334+
SingleFileDatasetFactory factory = new SingleFileDatasetFactory(allocator,
335+
NativeMemoryPool.getDefault(), FileFormat.CSV, "file://" + resourcePath("data/people.csv"));
336+
ScanOptions options = new ScanOptions(new String[]{}, Filter.EMPTY, 100);
337+
Schema schema = factory.inspect();
338+
NativeDataset dataset = factory.finish(schema);
339+
NativeScanner nativeScanner = dataset.newScan(options);
340+
List<? extends ScanTask> scanTasks = collect(nativeScanner.scan());
341+
Assert.assertEquals(1, scanTasks.size());
342+
ScanTask scanTask = scanTasks.get(0);
343+
ScanTask.Itr itr = scanTask.scan();
344+
345+
VectorSchemaRoot vsr = null;
346+
int rowCount = 0;
347+
while (itr.hasNext()) {
348+
// FIXME VectorSchemaRoot is not actually something ITERABLE. Using a reader convention instead.
349+
vsr = itr.next().valueVectors;
350+
rowCount += vsr.getRowCount();
351+
352+
// check if projector is applied
353+
Assert.assertEquals("Schema<name: Utf8, age: Int(64, true), job: Utf8>",
354+
vsr.getSchema().toString());
355+
}
356+
Assert.assertEquals(2, rowCount);
357+
assertEquals(3, schema.getFields().size());
358+
assertEquals("name", schema.getFields().get(0).getName());
359+
assertEquals("age", schema.getFields().get(1).getName());
360+
assertEquals("job", schema.getFields().get(2).getName());
361+
if (vsr != null) {
362+
vsr.close();
363+
}
364+
allocator.close();
365+
}
366+
322367
@Ignore
323368
public void testFilter() {
324369
// todo
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name,age,job
2+
Jorge,30,Developer
3+
Bob,32,Developer

0 commit comments

Comments
 (0)