Skip to content

Commit 453cbf3

Browse files
imback82HyukjinKwon
authored andcommitted
[SPARK-28284][SQL][PYTHON][TESTS] Convert and port 'join-empty-relation.sql' into UDF test base
## What changes were proposed in this pull request? This PR adds some tests converted from `join-empty-relation.sql` to test UDFs. Please see contribution guide of this umbrella ticket - [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921). <details><summary>Diff comparing to 'join-empty-relation.sql'</summary> <p> ```diff diff --git a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out index 857073a..e79d01f 100644 --- a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out -27,111 +27,111 struct<> -- !query 3 -SELECT * FROM t1 INNER JOIN empty_table +SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a))) -- !query 3 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int> -- !query 3 output -- !query 4 -SELECT * FROM t1 CROSS JOIN empty_table +SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a)) -- !query 4 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 4 output -- !query 5 -SELECT * FROM t1 LEFT OUTER JOIN empty_table +SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a)) -- !query 5 schema -struct<a:int,a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,a:int> -- !query 5 output 1 NULL -- !query 6 -SELECT * FROM t1 RIGHT OUTER JOIN empty_table +SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a)) -- !query 6 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int> -- !query 6 output -- !query 7 -SELECT * FROM t1 FULL OUTER JOIN empty_table +SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a)) -- !query 7 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,a:int> -- !query 7 output 1 NULL -- !query 8 -SELECT * FROM t1 LEFT SEMI JOIN empty_table +SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a))) -- !query 8 schema -struct<a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 8 output -- !query 9 -SELECT * FROM t1 LEFT ANTI JOIN empty_table +SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a)) -- !query 9 schema -struct<a:int> +struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 9 output 1 -- !query 10 -SELECT * FROM empty_table INNER JOIN t1 +SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a)) -- !query 10 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int> -- !query 10 output -- !query 11 -SELECT * FROM empty_table CROSS JOIN t1 +SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a))) -- !query 11 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 11 output -- !query 12 -SELECT * FROM empty_table LEFT OUTER JOIN t1 +SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a)) -- !query 12 schema -struct<a:int,a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int> -- !query 12 output -- !query 13 -SELECT * FROM empty_table RIGHT OUTER JOIN t1 +SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a)) -- !query 13 schema -struct<a:int,a:int> +struct<a:int,CAST(udf(cast(a as string)) AS INT):int> -- !query 13 output NULL 1 -- !query 14 -SELECT * FROM empty_table FULL OUTER JOIN t1 +SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a)) -- !query 14 schema -struct<a:int,a:int> +struct<a:int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 14 output NULL 1 -- !query 15 -SELECT * FROM empty_table LEFT SEMI JOIN t1 +SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a))) -- !query 15 schema -struct<a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 15 output -- !query 16 -SELECT * FROM empty_table LEFT ANTI JOIN t1 +SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a)) -- !query 16 schema struct<a:int> -- !query 16 output -139,56 +139,56 struct<a:int> -- !query 17 -SELECT * FROM empty_table INNER JOIN empty_table +SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a))) -- !query 17 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 17 output -- !query 18 -SELECT * FROM empty_table CROSS JOIN empty_table +SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a)) -- !query 18 schema -struct<a:int,a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 18 output -- !query 19 -SELECT * FROM empty_table LEFT OUTER JOIN empty_table +SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a)) -- !query 19 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 19 output -- !query 20 -SELECT * FROM empty_table RIGHT OUTER JOIN empty_table +SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a))) -- !query 20 schema -struct<a:int,a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 20 output -- !query 21 -SELECT * FROM empty_table FULL OUTER JOIN empty_table +SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a)) -- !query 21 schema -struct<a:int,a:int> +struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 21 output -- !query 22 -SELECT * FROM empty_table LEFT SEMI JOIN empty_table +SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a)) -- !query 22 schema -struct<a:int> +struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int> -- !query 22 output -- !query 23 -SELECT * FROM empty_table LEFT ANTI JOIN empty_table +SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a)) -- !query 23 schema -struct<a:int> +struct<CAST(udf(cast(a as string)) AS INT):int> -- !query 23 output ``` </p> </details> ## How was this patch tested? Tested as guided in [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921). Closes #25127 from imback82/join-empty-relation-sql. Authored-by: Terry Kim <yuminkim@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
1 parent 52ddf03 commit 453cbf3

File tree

2 files changed

+229
-0
lines changed

2 files changed

+229
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
-- List of configuration the test suite is run against:
2+
--SET spark.sql.autoBroadcastJoinThreshold=10485760
3+
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
4+
--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
5+
6+
-- This test file was converted from join-empty-relation.sql.
7+
8+
CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
9+
CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
10+
11+
CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false;
12+
13+
SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)));
14+
SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a));
15+
SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
16+
SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
17+
SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
18+
SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)));
19+
SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a));
20+
21+
SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a));
22+
SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)));
23+
SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
24+
SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
25+
SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
26+
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)));
27+
SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a));
28+
29+
SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)));
30+
SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a));
31+
SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
32+
SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)));
33+
SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
34+
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
35+
SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a));
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 24
3+
4+
5+
-- !query 0
6+
CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
7+
-- !query 0 schema
8+
struct<>
9+
-- !query 0 output
10+
11+
12+
13+
-- !query 1
14+
CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
15+
-- !query 1 schema
16+
struct<>
17+
-- !query 1 output
18+
19+
20+
21+
-- !query 2
22+
CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
23+
-- !query 2 schema
24+
struct<>
25+
-- !query 2 output
26+
27+
28+
29+
-- !query 3
30+
SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
31+
-- !query 3 schema
32+
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
33+
-- !query 3 output
34+
35+
36+
37+
-- !query 4
38+
SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a))
39+
-- !query 4 schema
40+
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
41+
-- !query 4 output
42+
43+
44+
45+
-- !query 5
46+
SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
47+
-- !query 5 schema
48+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,a:int>
49+
-- !query 5 output
50+
1 NULL
51+
52+
53+
-- !query 6
54+
SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
55+
-- !query 6 schema
56+
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
57+
-- !query 6 output
58+
59+
60+
61+
-- !query 7
62+
SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
63+
-- !query 7 schema
64+
struct<CAST(udf(cast(a as string)) AS INT):int,a:int>
65+
-- !query 7 output
66+
1 NULL
67+
68+
69+
-- !query 8
70+
SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
71+
-- !query 8 schema
72+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
73+
-- !query 8 output
74+
75+
76+
77+
-- !query 9
78+
SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
79+
-- !query 9 schema
80+
struct<CAST(udf(cast(a as string)) AS INT):int>
81+
-- !query 9 output
82+
1
83+
84+
85+
-- !query 10
86+
SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a))
87+
-- !query 10 schema
88+
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
89+
-- !query 10 output
90+
91+
92+
93+
-- !query 11
94+
SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
95+
-- !query 11 schema
96+
struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
97+
-- !query 11 output
98+
99+
100+
101+
-- !query 12
102+
SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
103+
-- !query 12 schema
104+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
105+
-- !query 12 output
106+
107+
108+
109+
-- !query 13
110+
SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
111+
-- !query 13 schema
112+
struct<a:int,CAST(udf(cast(a as string)) AS INT):int>
113+
-- !query 13 output
114+
NULL 1
115+
116+
117+
-- !query 14
118+
SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
119+
-- !query 14 schema
120+
struct<a:int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
121+
-- !query 14 output
122+
NULL 1
123+
124+
125+
-- !query 15
126+
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
127+
-- !query 15 schema
128+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
129+
-- !query 15 output
130+
131+
132+
133+
-- !query 16
134+
SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
135+
-- !query 16 schema
136+
struct<a:int>
137+
-- !query 16 output
138+
139+
140+
141+
-- !query 17
142+
SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
143+
-- !query 17 schema
144+
struct<CAST(udf(cast(a as string)) AS INT):int>
145+
-- !query 17 output
146+
147+
148+
149+
-- !query 18
150+
SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a))
151+
-- !query 18 schema
152+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
153+
-- !query 18 output
154+
155+
156+
157+
-- !query 19
158+
SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
159+
-- !query 19 schema
160+
struct<CAST(udf(cast(a as string)) AS INT):int>
161+
-- !query 19 output
162+
163+
164+
165+
-- !query 20
166+
SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
167+
-- !query 20 schema
168+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
169+
-- !query 20 output
170+
171+
172+
173+
-- !query 21
174+
SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
175+
-- !query 21 schema
176+
struct<CAST(udf(cast(a as string)) AS INT):int>
177+
-- !query 21 output
178+
179+
180+
181+
-- !query 22
182+
SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
183+
-- !query 22 schema
184+
struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
185+
-- !query 22 output
186+
187+
188+
189+
-- !query 23
190+
SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
191+
-- !query 23 schema
192+
struct<CAST(udf(cast(a as string)) AS INT):int>
193+
-- !query 23 output
194+

0 commit comments

Comments
 (0)