Skip to content

Commit ed82ced

Browse files
julienledemkou
authored andcommitted
ARROW-257: Add a typeids Vector to Union type
Author: Julien Le Dem <julien@dremio.com> Closes apache#143 from julienledem/union and squashes the following commits: cd1b711 [Julien Le Dem] ARROW-257: Add a typeids Vector to Union type
1 parent 9c1173d commit ed82ced

File tree

5 files changed

+40
-14
lines changed

5 files changed

+40
-14
lines changed

vector/src/main/codegen/data/ArrowTypes.tdd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
},
3131
{
3232
name: "Union",
33-
fields: [{name: "mode", type: short}]
33+
fields: [{name: "mode", type: short}, {name: "typeIds", type: "int[]"}]
3434
},
3535
{
3636
name: "Int",

vector/src/main/codegen/templates/ArrowType.java

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,23 @@
3333

3434
import java.util.Objects;
3535

36+
/**
37+
* Arrow types
38+
**/
3639
public abstract class ArrowType {
3740

3841
public abstract byte getTypeType();
3942
public abstract int getType(FlatBufferBuilder builder);
4043
public abstract <T> T accept(ArrowTypeVisitor<T> visitor);
4144

45+
/**
46+
* to visit the ArrowTypes
47+
* <code>
48+
* type.accept(new ArrowTypeVisitor<Type>() {
49+
* ...
50+
* });
51+
* </code>
52+
*/
4253
public static interface ArrowTypeVisitor<T> {
4354
<#list arrowTypes.types as type>
4455
T visit(${type.name} type);
@@ -55,9 +66,7 @@ public static class ${name} extends ArrowType {
5566
</#if>
5667

5768
<#list fields as field>
58-
<#assign fieldName = field.name>
59-
<#assign fieldType = field.type>
60-
${fieldType} ${fieldName};
69+
${field.type} ${field.name};
6170
</#list>
6271

6372
<#if type.fields?size != 0>
@@ -79,6 +88,9 @@ public int getType(FlatBufferBuilder builder) {
7988
<#if field.type == "String">
8089
int ${field.name} = builder.createString(this.${field.name});
8190
</#if>
91+
<#if field.type == "int[]">
92+
int ${field.name} = org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name});
93+
</#if>
8294
</#list>
8395
org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder);
8496
<#list type.fields as field>
@@ -96,7 +108,7 @@ public int getType(FlatBufferBuilder builder) {
96108
public String toString() {
97109
return "${name}{"
98110
<#list fields as field>
99-
+ ", " + ${field.name}
111+
+ <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}</#if><#if field_has_next> + ", " </#if>
100112
</#list>
101113
+ "}";
102114
}
@@ -115,8 +127,7 @@ public boolean equals(Object obj) {
115127
return true;
116128
<#else>
117129
${type.name} that = (${type.name}) obj;
118-
return
119-
<#list type.fields as field>Objects.equals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>;</#if>
130+
return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>;</#if>
120131
</#list>
121132
</#if>
122133
}
@@ -134,9 +145,20 @@ public static org.apache.arrow.vector.types.pojo.ArrowType getTypeForField(org.a
134145
<#assign name = type.name>
135146
<#assign nameLower = type.name?lower_case>
136147
<#assign fields = type.fields>
137-
case Type.${type.name}:
148+
case Type.${type.name}: {
138149
org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name}) field.type(new org.apache.arrow.flatbuf.${type.name}());
139-
return new ${type.name}(<#list type.fields as field>${nameLower}Type.${field.name}()<#if field_has_next>, </#if></#list>);
150+
<#list type.fields as field>
151+
<#if field.type == "int[]">
152+
${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()];
153+
for (int i = 0; i< ${field.name}.length; ++i) {
154+
${field.name}[i] = ${nameLower}Type.${field.name}(i);
155+
}
156+
<#else>
157+
${field.type} ${field.name} = ${nameLower}Type.${field.name}();
158+
</#if>
159+
</#list>
160+
return new ${type.name}(<#list type.fields as field>${field.name}<#if field_has_next>, </#if></#list>);
161+
}
140162
</#list>
141163
default:
142164
throw new UnsupportedOperationException("Unsupported type: " + field.typeType());

vector/src/main/codegen/templates/UnionVector.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,13 @@ public void clear() {
232232
@Override
233233
public Field getField() {
234234
List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>();
235-
for (ValueVector v : internalMap.getChildren()) {
235+
List<FieldVector> children = internalMap.getChildren();
236+
int[] typeIds = new int[children.size()];
237+
for (ValueVector v : children) {
238+
typeIds[childFields.size()] = v.getMinorType().ordinal();
236239
childFields.add(v.getField());
237240
}
238-
return new Field(name, true, new ArrowType.Union(Sparse), childFields);
241+
return new Field(name, true, new ArrowType.Union(Sparse, typeIds), childFields);
239242
}
240243
241244
@Override

vector/src/main/java/org/apache/arrow/vector/types/Types.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
472472
return new UnionListWriter((ListVector) vector);
473473
}
474474
},
475-
UNION(new Union(UnionMode.Sparse)) {
475+
UNION(new Union(UnionMode.Sparse, null)) {
476476
@Override
477477
public Field getField() {
478478
throw new UnsupportedOperationException("Cannot get simple field for Union type");

vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@
2222
import static org.junit.Assert.assertEquals;
2323

2424
import org.apache.arrow.flatbuf.UnionMode;
25+
import org.apache.arrow.vector.types.Types.MinorType;
2526
import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
2627
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
2728
import org.apache.arrow.vector.types.pojo.ArrowType.List;
28-
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
2929
import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
30+
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
3031
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
3132
import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
3233
import org.apache.arrow.vector.types.pojo.Field;
@@ -78,7 +79,7 @@ public void nestedSchema() {
7879
childrenBuilder.add(new Field("child4", true, new List(), ImmutableList.<Field>of(
7980
new Field("child4.1", true, Utf8.INSTANCE, null)
8081
)));
81-
childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse), ImmutableList.<Field>of(
82+
childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse, new int[] { MinorType.TIMESTAMP.ordinal(), MinorType.FLOAT8.ordinal() } ), ImmutableList.<Field>of(
8283
new Field("child5.1", true, new Timestamp("UTC"), null),
8384
new Field("child5.2", true, new FloatingPoint(DOUBLE), ImmutableList.<Field>of())
8485
)));

0 commit comments

Comments
 (0)