Skip to content

Commit 929dfae

Browse files
committed
[DX] spark sql support GeometryUDT type.
1 parent c4e1106 commit 929dfae

File tree

26 files changed

+522
-8
lines changed

26 files changed

+522
-8
lines changed

api/src/main/java/org/apache/iceberg/types/Type.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ enum TypeID {
4545
DECIMAL(BigDecimal.class),
4646
STRUCT(StructLike.class),
4747
LIST(List.class),
48-
MAP(Map.class);
48+
MAP(Map.class),
49+
GEOMETRY(Types.GeometryType.class);
4950

5051
private final Class<?> javaClass;
5152

api/src/main/java/org/apache/iceberg/types/TypeUtil.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,10 @@ public T map(Types.MapType map, T keyResult, T valueResult) {
615615
public T primitive(Type.PrimitiveType primitive) {
616616
return null;
617617
}
618+
619+
public T geometry(Type type) {
620+
return null;
621+
}
618622
}
619623

620624
public static <T> T visit(Schema schema, SchemaVisitor<T> visitor) {
@@ -675,6 +679,9 @@ public static <T> T visit(Type type, SchemaVisitor<T> visitor) {
675679

676680
return visitor.map(map, keyResult, valueResult);
677681

682+
case GEOMETRY:
683+
return visitor.geometry(type);
684+
678685
default:
679686
return visitor.primitive(type.asPrimitiveType());
680687
}

api/src/main/java/org/apache/iceberg/types/Types.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ private Types() {}
5555
.put(StringType.get().toString(), StringType.get())
5656
.put(UUIDType.get().toString(), UUIDType.get())
5757
.put(BinaryType.get().toString(), BinaryType.get())
58+
.put(GeometryType.get().toString().toLowerCase(Locale.ROOT), GeometryType.get())
5859
.buildOrThrow();
5960

6061
private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]");
@@ -1053,4 +1054,22 @@ private List<NestedField> lazyFieldList() {
10531054
return fields;
10541055
}
10551056
}
1057+
1058+
public static class GeometryType extends PrimitiveType {
1059+
private static final GeometryType INSTANCE = new GeometryType();
1060+
1061+
public static GeometryType get() {
1062+
return INSTANCE;
1063+
}
1064+
1065+
@Override
1066+
public TypeID typeId() {
1067+
return TypeID.GEOMETRY;
1068+
}
1069+
1070+
@Override
1071+
public String toString() {
1072+
return "Geometry";
1073+
}
1074+
}
10561075
}

arrow/src/main/java/org/apache/iceberg/arrow/ArrowSchemaUtil.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public static Field convert(final NestedField field) {
6666

6767
switch (field.type().typeId()) {
6868
case BINARY:
69+
case GEOMETRY:
6970
arrowType = ArrowType.Binary.INSTANCE;
7071
break;
7172
case FIXED:

arrow/src/test/java/org/apache/iceberg/arrow/ArrowSchemaUtilTest.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public class ArrowSchemaUtilTest {
5656
private static final String LIST_FIELD = "lt";
5757
private static final String MAP_FIELD = "mt";
5858
private static final String UUID_FIELD = "uu";
59+
private static final String GEOM_FIELD = "geom";
5960

6061
@Test
6162
public void convertPrimitive() {
@@ -79,7 +80,8 @@ public void convertPrimitive() {
7980
MAP_FIELD,
8081
Types.MapType.ofOptional(15, 16, StringType.get(), IntegerType.get())),
8182
Types.NestedField.optional(17, FIXED_WIDTH_BINARY_FIELD, Types.FixedType.ofLength(10)),
82-
Types.NestedField.optional(18, UUID_FIELD, Types.UUIDType.get()));
83+
Types.NestedField.optional(18, UUID_FIELD, Types.UUIDType.get()),
84+
Types.NestedField.optional(19, GEOM_FIELD, Types.GeometryType.get()));
8385

8486
org.apache.arrow.vector.types.pojo.Schema arrow = ArrowSchemaUtil.convert(iceberg);
8587

@@ -179,6 +181,10 @@ private void validate(Type iceberg, Field field, boolean optional) {
179181
assertThat(field.getName()).isEqualTo(UUID_FIELD);
180182
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.FixedSizeBinary.TYPE_TYPE);
181183
break;
184+
case GEOMETRY:
185+
assertThat(field.getName()).isEqualTo(GEOM_FIELD);
186+
assertThat(arrowType.getTypeID()).isEqualTo(ArrowType.Binary.TYPE_TYPE);
187+
break;
182188
default:
183189
throw new UnsupportedOperationException("Check not implemented for type: " + iceberg);
184190
}

hive-metastore/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ private static String convertToTypeString(Type type) {
166166
return "timestamp";
167167
case FIXED:
168168
case BINARY:
169+
case GEOMETRY:
169170
return "binary";
170171
case DECIMAL:
171172
final Types.DecimalType decimalType = (Types.DecimalType) type;

orc/src/main/java/org/apache/iceberg/orc/ExpressionToSearchArgument.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,13 @@ static SearchArgument convert(Expression expr, TypeDescription readSchema) {
5858
// these Iceberg types
5959
private static final Set<TypeID> UNSUPPORTED_TYPES =
6060
ImmutableSet.of(
61-
TypeID.BINARY, TypeID.FIXED, TypeID.UUID, TypeID.STRUCT, TypeID.MAP, TypeID.LIST);
61+
TypeID.BINARY,
62+
TypeID.FIXED,
63+
TypeID.UUID,
64+
TypeID.STRUCT,
65+
TypeID.MAP,
66+
TypeID.LIST,
67+
TypeID.GEOMETRY);
6268

6369
private final SearchArgument.Builder builder;
6470
private final Map<Integer, String> idToColumnName;

orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ public final class ORCSchemaUtil {
4040
public enum BinaryType {
4141
UUID,
4242
FIXED,
43-
BINARY
43+
BINARY,
44+
GEOMETRY
4445
}
4546

4647
public enum LongType {
@@ -101,6 +102,7 @@ public TypeDescription type() {
101102
.put(Type.TypeID.FIXED, TypeDescription.Category.BINARY)
102103
.put(Type.TypeID.BINARY, TypeDescription.Category.BINARY)
103104
.put(Type.TypeID.DECIMAL, TypeDescription.Category.DECIMAL)
105+
.put(Type.TypeID.GEOMETRY, TypeDescription.Category.BINARY)
104106
.build();
105107

106108
private ORCSchemaUtil() {}
@@ -202,6 +204,10 @@ private static TypeDescription convert(Integer fieldId, Type type, boolean isReq
202204
orcType = TypeDescription.createMap(keyType, valueType);
203205
break;
204206
}
207+
case GEOMETRY:
208+
orcType = TypeDescription.createBinary();
209+
orcType.setAttribute(ICEBERG_BINARY_TYPE_ATTRIBUTE, BinaryType.GEOMETRY.toString());
210+
break;
205211
default:
206212
throw new IllegalArgumentException("Unhandled type " + type.typeId());
207213
}

orc/src/main/java/org/apache/iceberg/orc/OrcToIcebergVisitor.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ public Optional<Types.NestedField> primitive(TypeDescription primitive) {
160160
case BINARY:
161161
foundField = Types.NestedField.of(icebergID, isOptional, name, Types.BinaryType.get());
162162
break;
163+
case GEOMETRY:
164+
foundField =
165+
Types.NestedField.of(icebergID, isOptional, name, Types.GeometryType.get());
166+
break;
163167
default:
164168
throw new IllegalStateException("Invalid Binary type found in ORC type attribute");
165169
}

orc/src/test/java/org/apache/iceberg/orc/TestEstimateOrcAvgWidthVisitor.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ public class TestEstimateOrcAvgWidthVisitor {
7272
required(24, "booleanField", Types.BooleanType.get()),
7373
optional(25, "date", Types.DateType.get()),
7474
optional(27, "timestamp", Types.TimestampType.withZone())));
75+
protected static final Types.NestedField GEOM_FIELD =
76+
optional(24, "geometry", Types.GeometryType.get());
7577

7678
@Test
7779
public void testEstimateIntegerWidth() {
@@ -225,6 +227,16 @@ public void testEstimateFullWidth() {
225227
.isEqualTo(611);
226228
}
227229

230+
@Test
231+
public void testEstimateGeometryWidth() {
232+
Schema geomSchema = new Schema(GEOM_FIELD);
233+
TypeDescription geomOrcSchema = ORCSchemaUtil.convert(geomSchema);
234+
long estimateLength = getEstimateLength(geomOrcSchema);
235+
assertThat(estimateLength)
236+
.as("Estimated average length of geometry must be 128.")
237+
.isEqualTo(128);
238+
}
239+
228240
private Integer getEstimateLength(TypeDescription orcSchemaWithDate) {
229241
return OrcSchemaVisitor.visitSchema(orcSchemaWithDate, new EstimateOrcAvgWidthVisitor())
230242
.stream()

0 commit comments

Comments
 (0)