Skip to content

Commit 1cf4cdd

Browse files
Mike Pigottxhochy
authored andcommitted
ARROW-3966 [Java] JDBC Column Metadata in Arrow Field Metadata
https://issues.apache.org/jira/browse/ARROW-3966 This change includes apache#3133, and supports a new configuration item called "Include Metadata." If true, metadata from the JDBC ResultSetMetaData object is pulled along to the Schema Field Metadata. For now, this includes: * Catalog Name * Table Name * Column Name * Column Type Name Author: Mike Pigott <mpigott@gmail.com> Author: Michael Pigott <mikepigott@users.noreply.github.com> Closes apache#3134 from mikepigott/jdbc-column-metadata and squashes the following commits: 02f2f34 <Mike Pigott> ARROW-3966: Picking up lost change to support null calendars. 7049c36 <Mike Pigott> Merge branch 'master' into jdbc-column-metadata e9a9b2b <Michael Pigott> Merge pull request #6 from apache/master 65741a9 <Mike Pigott> ARROW-3966: Code review feedback cc6cc88 <Mike Pigott> ARROW-3966: Using a 1:N loop instead of a 0:N-1 loop for fewer index offsets in code. cfb2ba6 <Mike Pigott> ARROW-3966: Using a helper method for building a UTC calendar with root locale. 2928513 <Mike Pigott> ARROW-3966: Moving the metadata flag assignment into the builder. 69022c2 <Mike Pigott> ARROW-3966: Fixing merge. 4a6de86 <Mike Pigott> Merge branch 'master' into jdbc-column-metadata 509a1cc <Michael Pigott> Merge pull request #5 from apache/master 789c8c8 <Michael Pigott> Merge pull request #4 from apache/master e5b19ee <Michael Pigott> Merge pull request #3 from apache/master 3b17c29 <Michael Pigott> Merge pull request #2 from apache/master d847ebc <Mike Pigott> Fixing file location 1ceac9e <Mike Pigott> Merge branch 'master' into jdbc-column-metadata 881c6c8 <Michael Pigott> Merge pull request #1 from apache/master 03091a8 <Mike Pigott> Unit tests for including result set metadata. 72d64cc <Mike Pigott> Affirming the field metadata is empty when the configuration excludes field metadata. 7b4527c <Mike Pigott> Test for the include-metadata flag in the configuration. 7e9ce37 <Mike Pigott> Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata bb3165b <Mike Pigott> Updating the function calls to use the JdbcToArrowConfig versions. a6fb1be <Mike Pigott> Fixing function call 5bfd6a2 <Mike Pigott> Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata 68c91e7 <Mike Pigott> Modifying the jdbcToArrowSchema and jdbcToArrowVectors methods to receive JdbcToArrowConfig objects. b5b0cb1 <Mike Pigott> Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata 8d6cf00 <Mike Pigott> Documentation for public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) 4f1260c <Mike Pigott> Adding documentation for public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config) e34a9e7 <Mike Pigott> Fixing formatting. fe097c8 <Mike Pigott> Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata df632e3 <Mike Pigott> Updating the SQL tests to include JdbcToArrowConfig versions. b270044 <Mike Pigott> Updated validaton & documentation, and unit tests for the new JdbcToArrowConfig. da77cbe <Mike Pigott> Creating a configuration class for the JDBC-to-Arrow converter. a78c770 <Mike Pigott> Updating Javadocs. 523387f <Mike Pigott> Updating the API to support an optional 'includeMetadata' field. 5af1b5b <Mike Pigott> Separating out the field-type creation from the field creation.
1 parent 5863a9f commit 1cf4cdd

File tree

12 files changed

+295
-42
lines changed

12 files changed

+295
-42
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.arrow.adapter.jdbc;
19+
20+
public class Constants {
21+
22+
public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME";
23+
public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME";
24+
public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME";
25+
public static final String SQL_TYPE_KEY = "SQL_TYPE";
26+
27+
}

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
import java.sql.SQLException;
2424
import java.sql.Statement;
2525
import java.util.Calendar;
26-
import java.util.Locale;
27-
import java.util.TimeZone;
2826

2927
import org.apache.arrow.memory.BaseAllocator;
3028
import org.apache.arrow.memory.RootAllocator;
@@ -90,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
9088
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
9189

9290
JdbcToArrowConfig config =
93-
new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
91+
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
9492
return sqlToArrow(connection, query, config);
9593
}
9694

@@ -112,12 +110,13 @@ public static VectorSchemaRoot sqlToArrow(
112110
String query,
113111
BaseAllocator allocator,
114112
Calendar calendar) throws SQLException, IOException {
113+
115114
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
116115
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
117116
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
118117
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
119118

120-
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
119+
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false));
121120
}
122121

123122
/**
@@ -154,7 +153,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J
154153
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException {
155154
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
156155

157-
return sqlToArrow(resultSet, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
156+
return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar());
158157
}
159158

160159
/**
@@ -171,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
171170
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
172171

173172
JdbcToArrowConfig config =
174-
new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
173+
new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false);
175174
return sqlToArrow(resultSet, config);
176175
}
177176

@@ -186,7 +185,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
186185
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
187186
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
188187

189-
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
188+
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
190189
}
191190

192191
/**
@@ -198,12 +197,15 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
198197
* @return Arrow Data Objects {@link VectorSchemaRoot}
199198
* @throws SQLException on error
200199
*/
201-
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator, Calendar calendar)
200+
public static VectorSchemaRoot sqlToArrow(
201+
ResultSet resultSet,
202+
BaseAllocator allocator,
203+
Calendar calendar)
202204
throws SQLException, IOException {
203205
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
204206
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
205207

206-
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
208+
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false));
207209
}
208210

209211
/**

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,23 @@
3737
public final class JdbcToArrowConfig {
3838
private Calendar calendar;
3939
private BaseAllocator allocator;
40+
private boolean includeMetadata;
4041

4142
/**
4243
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
4344
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
4445
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
4546
*
46-
* @param allocator The memory allocator to construct the Arrow vectors with.
47-
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
47+
* @param allocator The memory allocator to construct the Arrow vectors with.
48+
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
49+
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
4850
*/
49-
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
51+
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
5052
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
5153

5254
this.allocator = allocator;
5355
this.calendar = calendar;
56+
this.includeMetadata = includeMetadata;
5457
}
5558

5659
/**
@@ -70,4 +73,13 @@ public Calendar getCalendar() {
7073
public BaseAllocator getAllocator() {
7174
return allocator;
7275
}
76+
77+
/**
78+
* Whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
79+
*
80+
* @return <code>true</code> to include field metadata, <code>false</code> to exclude it.
81+
*/
82+
public boolean shouldIncludeMetadata() {
83+
return includeMetadata;
84+
}
7385
}

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
public class JdbcToArrowConfigBuilder {
3030
private Calendar calendar;
3131
private BaseAllocator allocator;
32+
private boolean includeMetadata;
3233

3334
/**
3435
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
@@ -38,6 +39,7 @@ public class JdbcToArrowConfigBuilder {
3839
public JdbcToArrowConfigBuilder() {
3940
this.allocator = null;
4041
this.calendar = null;
42+
this.includeMetadata = false;
4143
}
4244

4345
/**
@@ -62,6 +64,32 @@ public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar) {
6264

6365
this.allocator = allocator;
6466
this.calendar = calendar;
67+
this.includeMetadata = false;
68+
}
69+
70+
/**
71+
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the
72+
* allocator and calendar are required. A {@link NullPointerException}
73+
* will be thrown if either of those arguments is <code>null</code>.
74+
* <p>
75+
* The allocator is used to construct Arrow vectors from the JDBC ResultSet.
76+
* The calendar is used to determine the time zone of {@link java.sql.Timestamp}
77+
* fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and
78+
* {@link java.sql.Timestamp} fields to a single, common time zone when reading
79+
* from the result set.
80+
* </p>
81+
* <p>
82+
* The <code>includeMetadata</code> argument, if <code>true</code> will cause
83+
* various information about each database field to be added to the Vector
84+
* Schema's field metadata.
85+
* </p>
86+
*
87+
* @param allocator The Arrow Vector memory allocator.
88+
* @param calendar The calendar to use when constructing timestamp fields.
89+
*/
90+
public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
91+
this(allocator, calendar);
92+
this.includeMetadata = includeMetadata;
6593
}
6694

6795
/**
@@ -87,6 +115,17 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
87115
return this;
88116
}
89117

118+
/**
119+
* Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
120+
*
121+
* @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata.
122+
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
123+
*/
124+
public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) {
125+
this.includeMetadata = includeMetadata;
126+
return this;
127+
}
128+
90129
/**
91130
* This builds the {@link JdbcToArrowConfig} from the provided
92131
* {@link BaseAllocator} and {@link Calendar}.
@@ -95,6 +134,6 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
95134
* @throws NullPointerException if either the allocator or calendar was not set.
96135
*/
97136
public JdbcToArrowConfig build() {
98-
return new JdbcToArrowConfig(allocator, calendar);
137+
return new JdbcToArrowConfig(allocator, calendar, includeMetadata);
99138
}
100139
}

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@
3636
import java.sql.Types;
3737
import java.util.ArrayList;
3838
import java.util.Calendar;
39+
import java.util.HashMap;
3940
import java.util.List;
41+
import java.util.Locale;
42+
import java.util.Map;
43+
import java.util.TimeZone;
4044

4145
import org.apache.arrow.memory.RootAllocator;
4246
import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -103,7 +107,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
103107
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
104108
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
105109

106-
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
110+
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar, false));
111+
}
112+
113+
/**
114+
* Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale.
115+
*/
116+
public static Calendar getUtcCalendar() {
117+
return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
107118
}
108119

109120
/**
@@ -145,78 +156,103 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
145156
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
146157
Preconditions.checkNotNull(config, "The configuration object must not be null");
147158

159+
final String timezone;
160+
if (config.getCalendar() != null) {
161+
timezone = config.getCalendar().getTimeZone().getID();
162+
} else {
163+
timezone = null;
164+
}
165+
148166
List<Field> fields = new ArrayList<>();
149167
int columnCount = rsmd.getColumnCount();
150168
for (int i = 1; i <= columnCount; i++) {
151-
String columnName = rsmd.getColumnName(i);
169+
final String columnName = rsmd.getColumnName(i);
170+
final FieldType fieldType;
171+
172+
final Map<String, String> metadata;
173+
if (config.shouldIncludeMetadata()) {
174+
metadata = new HashMap<>();
175+
metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i));
176+
metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i));
177+
metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName);
178+
metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i));
179+
180+
} else {
181+
metadata = null;
182+
}
183+
152184
switch (rsmd.getColumnType(i)) {
153185
case Types.BOOLEAN:
154186
case Types.BIT:
155-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null));
187+
fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata);
156188
break;
157189
case Types.TINYINT:
158-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(8, true)), null));
190+
fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata);
159191
break;
160192
case Types.SMALLINT:
161-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(16, true)), null));
193+
fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata);
162194
break;
163195
case Types.INTEGER:
164-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null));
196+
fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata);
165197
break;
166198
case Types.BIGINT:
167-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(64, true)), null));
199+
fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata);
168200
break;
169201
case Types.NUMERIC:
170202
case Types.DECIMAL:
171203
int precision = rsmd.getPrecision(i);
172204
int scale = rsmd.getScale(i);
173-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Decimal(precision, scale)), null));
205+
fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata);
174206
break;
175207
case Types.REAL:
176208
case Types.FLOAT:
177-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(SINGLE)), null));
209+
fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata);
178210
break;
179211
case Types.DOUBLE:
180-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(DOUBLE)), null));
212+
fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata);
181213
break;
182214
case Types.CHAR:
183215
case Types.NCHAR:
184216
case Types.VARCHAR:
185217
case Types.NVARCHAR:
186218
case Types.LONGVARCHAR:
187219
case Types.LONGNVARCHAR:
188-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null));
220+
case Types.CLOB:
221+
fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata);
189222
break;
190223
case Types.DATE:
191-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Date(DateUnit.MILLISECOND)), null));
224+
fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata);
192225
break;
193226
case Types.TIME:
194-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32)), null));
227+
fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata);
195228
break;
196229
case Types.TIMESTAMP:
197-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND,
198-
config.getCalendar().getTimeZone().getID())), null));
230+
fieldType =
231+
new FieldType(
232+
true,
233+
new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone),
234+
null,
235+
metadata);
199236
break;
200237
case Types.BINARY:
201238
case Types.VARBINARY:
202239
case Types.LONGVARBINARY:
203-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null));
204-
break;
205-
case Types.ARRAY:
206-
// TODO Need to handle this type
207-
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
208-
break;
209-
case Types.CLOB:
210-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null));
211-
break;
212240
case Types.BLOB:
213-
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null));
241+
fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata);
214242
break;
215243

244+
case Types.ARRAY:
245+
// TODO Need to handle this type
246+
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
216247
default:
217248
// no-op, shouldn't get here
249+
fieldType = null;
218250
break;
219251
}
252+
253+
if (fieldType != null) {
254+
fields.add(new Field(columnName, fieldType, null));
255+
}
220256
}
221257

222258
return new Schema(fields, null);
@@ -250,7 +286,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
250286
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
251287
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
252288

253-
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
289+
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false));
254290
}
255291

256292
/**

0 commit comments

Comments
 (0)