Skip to content

Commit e34e87f

Browse files
authored
HIVE-29310: Type casting issue in variant_get UDF (#6176)
1 parent 7003dc9 commit e34e87f

File tree

3 files changed

+171
-13
lines changed

3 files changed

+171
-13
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- Mask random uuid
2+
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
3+
-- Mask random snapshot id
4+
--! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/
5+
-- Mask current-snapshot-timestamp-ms
6+
--! qt:replace:/('current-snapshot-timestamp-ms'=')\d+/$1#Masked#/
7+
8+
-- SORT_QUERY_RESULTS
9+
set hive.explain.user=false;
10+
set hive.fetch.task.conversion=none;
11+
12+
CREATE EXTERNAL TABLE variant_filter_basic (
13+
id BIGINT,
14+
data VARIANT
15+
) STORED BY ICEBERG tblproperties('format-version'='3');
16+
17+
INSERT INTO variant_filter_basic VALUES
18+
(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city": "Wonderland"} }')),
19+
(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"} }')),
20+
(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city": "Dreamtown"} }'));
21+
22+
SELECT
23+
try_variant_get(data, '$.name') AS name,
24+
try_variant_get(data, '$.age', 'int') AS age,
25+
try_variant_get(data, '$.address.city') AS city
26+
FROM variant_filter_basic;
27+
28+
SELECT
29+
try_variant_get(data, '$.name') AS name,
30+
try_variant_get(data, '$.age', 'int') AS age,
31+
try_variant_get(data, '$.address.city') AS city
32+
FROM variant_filter_basic
33+
WHERE try_variant_get(data, '$.age', 'int') >= 30;
34+
35+
EXPLAIN SELECT
36+
try_variant_get(data, '$.name') AS name,
37+
try_variant_get(data, '$.age', 'int') AS age,
38+
try_variant_get(data, '$.address.city') AS city
39+
FROM variant_filter_basic
40+
WHERE try_variant_get(data, '$.age', 'int') >= 30;
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
PREHOOK: query: CREATE EXTERNAL TABLE variant_filter_basic (
2+
id BIGINT,
3+
data VARIANT
4+
) STORED BY ICEBERG tblproperties('format-version'='3')
5+
PREHOOK: type: CREATETABLE
6+
PREHOOK: Output: database:default
7+
PREHOOK: Output: default@variant_filter_basic
8+
POSTHOOK: query: CREATE EXTERNAL TABLE variant_filter_basic (
9+
id BIGINT,
10+
data VARIANT
11+
) STORED BY ICEBERG tblproperties('format-version'='3')
12+
POSTHOOK: type: CREATETABLE
13+
POSTHOOK: Output: database:default
14+
POSTHOOK: Output: default@variant_filter_basic
15+
PREHOOK: query: INSERT INTO variant_filter_basic VALUES
16+
(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city": "Wonderland"} }')),
17+
(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"} }')),
18+
(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city": "Dreamtown"} }'))
19+
PREHOOK: type: QUERY
20+
PREHOOK: Input: _dummy_database@_dummy_table
21+
PREHOOK: Output: default@variant_filter_basic
22+
POSTHOOK: query: INSERT INTO variant_filter_basic VALUES
23+
(1, parse_json('{ "name": "Alice", "age": 30, "address": {"city": "Wonderland"} }')),
24+
(2, parse_json('{ "name": "Bob", "age": 40, "address": {"city": "Builderland"} }')),
25+
(3, parse_json('{ "name": "Charlie", "age": 28, "address": {"city": "Dreamtown"} }'))
26+
POSTHOOK: type: QUERY
27+
POSTHOOK: Input: _dummy_database@_dummy_table
28+
POSTHOOK: Output: default@variant_filter_basic
29+
PREHOOK: query: SELECT
30+
try_variant_get(data, '$.name') AS name,
31+
try_variant_get(data, '$.age', 'int') AS age,
32+
try_variant_get(data, '$.address.city') AS city
33+
FROM variant_filter_basic
34+
PREHOOK: type: QUERY
35+
PREHOOK: Input: default@variant_filter_basic
36+
PREHOOK: Output: hdfs://### HDFS PATH ###
37+
POSTHOOK: query: SELECT
38+
try_variant_get(data, '$.name') AS name,
39+
try_variant_get(data, '$.age', 'int') AS age,
40+
try_variant_get(data, '$.address.city') AS city
41+
FROM variant_filter_basic
42+
POSTHOOK: type: QUERY
43+
POSTHOOK: Input: default@variant_filter_basic
44+
POSTHOOK: Output: hdfs://### HDFS PATH ###
45+
Alice 30 Wonderland
46+
Bob 40 Builderland
47+
Charlie 28 Dreamtown
48+
PREHOOK: query: SELECT
49+
try_variant_get(data, '$.name') AS name,
50+
try_variant_get(data, '$.age', 'int') AS age,
51+
try_variant_get(data, '$.address.city') AS city
52+
FROM variant_filter_basic
53+
WHERE try_variant_get(data, '$.age', 'int') >= 30
54+
PREHOOK: type: QUERY
55+
PREHOOK: Input: default@variant_filter_basic
56+
PREHOOK: Output: hdfs://### HDFS PATH ###
57+
POSTHOOK: query: SELECT
58+
try_variant_get(data, '$.name') AS name,
59+
try_variant_get(data, '$.age', 'int') AS age,
60+
try_variant_get(data, '$.address.city') AS city
61+
FROM variant_filter_basic
62+
WHERE try_variant_get(data, '$.age', 'int') >= 30
63+
POSTHOOK: type: QUERY
64+
POSTHOOK: Input: default@variant_filter_basic
65+
POSTHOOK: Output: hdfs://### HDFS PATH ###
66+
Alice 30 Wonderland
67+
Bob 40 Builderland
68+
PREHOOK: query: EXPLAIN SELECT
69+
try_variant_get(data, '$.name') AS name,
70+
try_variant_get(data, '$.age', 'int') AS age,
71+
try_variant_get(data, '$.address.city') AS city
72+
FROM variant_filter_basic
73+
WHERE try_variant_get(data, '$.age', 'int') >= 30
74+
PREHOOK: type: QUERY
75+
PREHOOK: Input: default@variant_filter_basic
76+
PREHOOK: Output: hdfs://### HDFS PATH ###
77+
POSTHOOK: query: EXPLAIN SELECT
78+
try_variant_get(data, '$.name') AS name,
79+
try_variant_get(data, '$.age', 'int') AS age,
80+
try_variant_get(data, '$.address.city') AS city
81+
FROM variant_filter_basic
82+
WHERE try_variant_get(data, '$.age', 'int') >= 30
83+
POSTHOOK: type: QUERY
84+
POSTHOOK: Input: default@variant_filter_basic
85+
POSTHOOK: Output: hdfs://### HDFS PATH ###
86+
STAGE DEPENDENCIES:
87+
Stage-1 is a root stage
88+
Stage-0 depends on stages: Stage-1
89+
90+
STAGE PLANS:
91+
Stage: Stage-1
92+
Tez
93+
#### A masked pattern was here ####
94+
Vertices:
95+
Map 1
96+
Map Operator Tree:
97+
TableScan
98+
alias: variant_filter_basic
99+
filterExpr: (try_variant_get(data, '$.age', 'int') >= 30) (type: boolean)
100+
Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: NONE
101+
Filter Operator
102+
predicate: (try_variant_get(data, '$.age', 'int') >= 30) (type: boolean)
103+
Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE
104+
Select Operator
105+
expressions: try_variant_get(data, '$.name') (type: string), try_variant_get(data, '$.age', 'int') (type: int), try_variant_get(data, '$.address.city') (type: string)
106+
outputColumnNames: _col0, _col1, _col2
107+
Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE
108+
File Output Operator
109+
compressed: false
110+
Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE
111+
table:
112+
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
113+
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
114+
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
115+
Execution mode: vectorized
116+
117+
Stage: Stage-0
118+
Fetch Operator
119+
limit: -1
120+
Processor Tree:
121+
ListSink
122+

ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFVariantGet.java

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
2727
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
2828
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
29+
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
30+
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector;
2931
import org.apache.hadoop.hive.serde2.variant.Variant;
3032
import org.apache.hadoop.hive.serde2.variant.VariantUtil;
3133
import org.slf4j.Logger;
@@ -58,8 +60,7 @@ public class GenericUDFVariantGet extends GenericUDF {
5860
private StructObjectInspector variantOI;
5961
private PrimitiveObjectInspector pathOI;
6062

61-
private PrimitiveObjectInspector typeOI;
62-
private boolean hasTypeArgument;
63+
private String targetType;
6364

6465
@Override
6566
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -77,12 +78,15 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
7778
}
7879
pathOI = (PrimitiveObjectInspector) arguments[1];
7980

80-
hasTypeArgument = arguments.length == 3;
81+
boolean hasTypeArgument = arguments.length == 3;
8182
if (hasTypeArgument) {
82-
if (!(arguments[2] instanceof PrimitiveObjectInspector)) {
83+
if (!(arguments[2] instanceof WritableConstantStringObjectInspector typeOI)) {
8384
throw new UDFArgumentException("Third argument must be string type name");
8485
}
85-
typeOI = (PrimitiveObjectInspector) arguments[2];
86+
targetType = typeOI.getWritableConstantValue().toString();
87+
88+
return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
89+
PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(targetType).primitiveCategory);
8690
}
8791

8892
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
@@ -103,14 +107,6 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException {
103107
}
104108
String path = pathOI.getPrimitiveJavaObject(pathObj).toString();
105109

106-
String targetType = null;
107-
if (hasTypeArgument) {
108-
Object typeObj = arguments[2].get();
109-
if (typeObj != null) {
110-
targetType = typeOI.getPrimitiveJavaObject(typeObj).toString();
111-
}
112-
}
113-
114110
Variant result = extractValueByPath(variant, path);
115111
// cast to target type
116112
return castValue(result, targetType);

0 commit comments

Comments
 (0)