Skip to content

Commit e674909

Browse files
DylanGuedesHyukjinKwon
authored andcommitted
[SPARK-29107][SQL][TESTS] Port window.sql (Part 1)
### What changes were proposed in this pull request? This PR ports window.sql from PostgreSQL regression tests https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql from lines 1~319 The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/expected/window.out ### Why are the changes needed? To ensure compatibility with PostgreSQL. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Pass the Jenkins. And, Comparison with PgSQL results. Closes #26119 from DylanGuedes/spark-29107. Authored-by: DylanGuedes <djmgguedes@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
1 parent f23c5d7 commit e674909

File tree

2 files changed

+1077
-0
lines changed

2 files changed

+1077
-0
lines changed
Lines changed: 352 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,352 @@
1+
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
2+
--
3+
-- Window Functions Testing
4+
-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L1-L319
5+
6+
CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
7+
8+
-- [SPARK-29540] Thrift in some cases can't parse string to date
9+
-- CREATE TABLE empsalary (
10+
-- depname string,
11+
-- empno integer,
12+
-- salary int,
13+
-- enroll_date date
14+
-- ) USING parquet;
15+
16+
-- [SPARK-29540] Thrift in some cases can't parse string to date
17+
-- INSERT INTO empsalary VALUES ('develop', 10, 5200, '2007-08-01');
18+
-- INSERT INTO empsalary VALUES ('sales', 1, 5000, '2006-10-01');
19+
-- INSERT INTO empsalary VALUES ('personnel', 5, 3500, '2007-12-10');
20+
-- INSERT INTO empsalary VALUES ('sales', 4, 4800, '2007-08-08');
21+
-- INSERT INTO empsalary VALUES ('personnel', 2, 3900, '2006-12-23');
22+
-- INSERT INTO empsalary VALUES ('develop', 7, 4200, '2008-01-01');
23+
-- INSERT INTO empsalary VALUES ('develop', 9, 4500, '2008-01-01');
24+
-- INSERT INTO empsalary VALUES ('sales', 3, 4800, '2007-08-01');
25+
-- INSERT INTO empsalary VALUES ('develop', 8, 6000, '2006-10-01');
26+
-- INSERT INTO empsalary VALUES ('develop', 11, 5200, '2007-08-15');
27+
28+
-- [SPARK-29540] Thrift in some cases can't parse string to date
29+
-- SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary;
30+
31+
-- [SPARK-29540] Thrift in some cases can't parse string to date
32+
-- SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary;
33+
34+
-- with GROUP BY
35+
SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
36+
GROUP BY four, ten ORDER BY four, ten;
37+
38+
-- [SPARK-29540] Thrift in some cases can't parse string to date
39+
-- SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname);
40+
41+
-- [SPARK-28064] Order by does not accept a call to rank()
42+
-- SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w;
43+
44+
-- empty window specification
45+
SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10;
46+
47+
SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ();
48+
49+
-- no window operation
50+
SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten);
51+
52+
-- cumulative aggregate
53+
SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10;
54+
55+
SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10;
56+
57+
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10;
58+
59+
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
60+
61+
SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
62+
63+
SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
64+
65+
SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 < 10;
66+
67+
-- [SPARK-28065] ntile does not accept NULL as input
68+
-- SELECT ntile(NULL) OVER (ORDER BY ten, four), ten, four FROM tenk1 LIMIT 2;
69+
70+
SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
71+
72+
-- [SPARK-28068] `lag` second argument must be a literal in Spark
73+
-- SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
74+
75+
-- [SPARK-28068] `lag` second argument must be a literal in Spark
76+
-- SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
77+
78+
SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
79+
80+
SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
81+
82+
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
83+
84+
SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
85+
86+
-- last returns the last row of the frame, which is CURRENT ROW in ORDER BY window.
87+
SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
88+
89+
SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
90+
(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
91+
ORDER BY four, ten;
92+
93+
-- [SPARK-27951] ANSI SQL: NTH_VALUE function
94+
-- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four
95+
-- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s;
96+
97+
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY two ORDER BY ten) AS wsum
98+
FROM tenk1 GROUP BY ten, two;
99+
100+
SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10;
101+
102+
SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
103+
sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
104+
FROM tenk1 WHERE unique2 < 10;
105+
106+
-- opexpr with different windows evaluation.
107+
SELECT * FROM(
108+
SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
109+
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
110+
count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
111+
sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
112+
FROM tenk1
113+
)sub WHERE total <> fourcount + twosum;
114+
115+
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10;
116+
117+
SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
118+
FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten);
119+
120+
-- [SPARK-29540] Thrift in some cases can't parse string to date
121+
-- more than one window with GROUP BY
122+
-- SELECT sum(salary),
123+
-- row_number() OVER (ORDER BY depname),
124+
-- sum(sum(salary)) OVER (ORDER BY depname DESC)
125+
-- FROM empsalary GROUP BY depname;
126+
127+
-- [SPARK-29540] Thrift in some cases can't parse string to date
128+
-- identical windows with different names
129+
-- SELECT sum(salary) OVER w1, count(*) OVER w2
130+
-- FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary);
131+
132+
-- subplan
133+
-- [SPARK-28379] Correlated scalar subqueries must be aggregated
134+
-- SELECT lead(ten, (SELECT two FROM tenk1 WHERE s.unique2 = unique2)) OVER (PARTITION BY four ORDER BY ten)
135+
-- FROM tenk1 s WHERE unique2 < 10;
136+
137+
-- empty table
138+
SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s;
139+
140+
-- [SPARK-29540] Thrift in some cases can't parse string to date
141+
-- mixture of agg/wfunc in the same window
142+
-- SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
143+
144+
-- Cannot safely cast 'enroll_date': StringType to DateType;
145+
-- SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
146+
-- SELECT *,
147+
-- CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
148+
-- CASE WHEN
149+
-- AVG(salary) OVER (PARTITION BY depname) < salary
150+
-- THEN 200 END AS depadj FROM empsalary
151+
-- )s;
152+
153+
create temporary view int4_tbl as select * from values
154+
(0),
155+
(123456),
156+
(-123456),
157+
(2147483647),
158+
(-2147483647)
159+
as int4_tbl(f1);
160+
161+
-- window function over ungrouped agg over empty row set (bug before 9.1)
162+
SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42;
163+
164+
-- window function with ORDER BY an expression involving aggregates (9.1 bug)
165+
select ten,
166+
sum(unique1) + sum(unique2) as res,
167+
rank() over (order by sum(unique1) + sum(unique2)) as rank
168+
from tenk1
169+
group by ten order by ten;
170+
171+
-- window and aggregate with GROUP BY expression (9.2 bug)
172+
-- explain
173+
-- select first(max(x)) over (), y
174+
-- from (select unique1 as x, ten+four as y from tenk1) ss
175+
-- group by y;
176+
177+
-- test non-default frame specifications
178+
SELECT four, ten,
179+
sum(ten) over (partition by four order by ten),
180+
last(ten) over (partition by four order by ten)
181+
FROM (select distinct ten, four from tenk1) ss;
182+
183+
SELECT four, ten,
184+
sum(ten) over (partition by four order by ten range between unbounded preceding and current row),
185+
last(ten) over (partition by four order by ten range between unbounded preceding and current row)
186+
FROM (select distinct ten, four from tenk1) ss;
187+
188+
SELECT four, ten,
189+
sum(ten) over (partition by four order by ten range between unbounded preceding and unbounded following),
190+
last(ten) over (partition by four order by ten range between unbounded preceding and unbounded following)
191+
FROM (select distinct ten, four from tenk1) ss;
192+
193+
-- [SPARK-29451] Some queries with divisions in SQL windows are failling in Thrift
194+
-- SELECT four, ten/4 as two,
195+
-- sum(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row),
196+
-- last(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row)
197+
-- FROM (select distinct ten, four from tenk1) ss;
198+
199+
-- [SPARK-29451] Some queries with divisions in SQL windows are failling in Thrift
200+
-- SELECT four, ten/4 as two,
201+
-- sum(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row),
202+
-- last(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row)
203+
-- FROM (select distinct ten, four from tenk1) ss;
204+
205+
SELECT sum(unique1) over (order by four range between current row and unbounded following),
206+
unique1, four
207+
FROM tenk1 WHERE unique1 < 10;
208+
209+
SELECT sum(unique1) over (rows between current row and unbounded following),
210+
unique1, four
211+
FROM tenk1 WHERE unique1 < 10;
212+
213+
SELECT sum(unique1) over (rows between 2 preceding and 2 following),
214+
unique1, four
215+
FROM tenk1 WHERE unique1 < 10;
216+
217+
-- [SPARK-28428] Spark `exclude` always expecting `()`
218+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others),
219+
-- unique1, four
220+
-- FROM tenk1 WHERE unique1 < 10;
221+
222+
-- [SPARK-28428] Spark `exclude` always expecting `()`
223+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row),
224+
-- unique1, four
225+
-- FROM tenk1 WHERE unique1 < 10;
226+
227+
-- [SPARK-28428] Spark `exclude` always expecting `()`
228+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group),
229+
-- unique1, four
230+
-- FROM tenk1 WHERE unique1 < 10;
231+
232+
-- [SPARK-28428] Spark `exclude` always expecting `()`
233+
-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties),
234+
-- unique1, four
235+
-- FROM tenk1 WHERE unique1 < 10;
236+
237+
-- [SPARK-28428] Spark `exclude` always expecting `()`
238+
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
239+
-- unique1, four
240+
-- FROM tenk1 WHERE unique1 < 10;
241+
242+
-- [SPARK-28428] Spark `exclude` always expecting `()`
243+
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
244+
-- unique1, four
245+
-- FROM tenk1 WHERE unique1 < 10;
246+
247+
-- [SPARK-28428] Spark `exclude` always expecting `()`
248+
-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
249+
-- unique1, four
250+
-- FROM tenk1 WHERE unique1 < 10;
251+
252+
-- [SPARK-28428] Spark `exclude` always expecting `()`
253+
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
254+
-- unique1, four
255+
-- FROM tenk1 WHERE unique1 < 10;
256+
257+
-- [SPARK-28428] Spark `exclude` always expecting `()`
258+
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
259+
-- unique1, four
260+
-- FROM tenk1 WHERE unique1 < 10;
261+
262+
-- [SPARK-28428] Spark `exclude` always expecting `()`
263+
-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
264+
-- unique1, four
265+
-- FROM tenk1 WHERE unique1 < 10;
266+
267+
SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
268+
unique1, four
269+
FROM tenk1 WHERE unique1 < 10;
270+
271+
SELECT sum(unique1) over (rows between 1 following and 3 following),
272+
unique1, four
273+
FROM tenk1 WHERE unique1 < 10;
274+
275+
SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
276+
unique1, four
277+
FROM tenk1 WHERE unique1 < 10;
278+
279+
-- [SPARK-28428] Spark `exclude` always expecting `()`
280+
-- SELECT sum(unique1) over (w range between current row and unbounded following),
281+
-- unique1, four
282+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
283+
284+
-- [SPARK-28428] Spark `exclude` always expecting `()`
285+
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row),
286+
-- unique1, four
287+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
288+
289+
-- [SPARK-28428] Spark `exclude` always expecting `()`
290+
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude group),
291+
-- unique1, four
292+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
293+
294+
-- [SPARK-28428] Spark `exclude` always expecting `()`
295+
-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude ties),
296+
-- unique1, four
297+
-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
298+
299+
-- [SPARK-27951] ANSI SQL: NTH_VALUE function
300+
-- SELECT first_value(unique1) over w,
301+
-- nth_value(unique1, 2) over w AS nth_2,
302+
-- last_value(unique1) over w, unique1, four
303+
-- FROM tenk1 WHERE unique1 < 10
304+
-- WINDOW w AS (order by four range between current row and unbounded following);
305+
306+
-- [SPARK-28501] Frame bound value must be a literal.
307+
-- SELECT sum(unique1) over
308+
-- (order by unique1
309+
-- rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
310+
-- unique1
311+
-- FROM tenk1 WHERE unique1 < 10;
312+
313+
CREATE TEMP VIEW v_window AS
314+
SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
315+
FROM range(1, 11) i;
316+
317+
SELECT * FROM v_window;
318+
319+
-- [SPARK-28428] Spark `exclude` always expecting `()`
320+
-- CREATE OR REPLACE TEMP VIEW v_window AS
321+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
322+
-- exclude current row) as sum_rows FROM range(1, 10) i;
323+
324+
-- SELECT * FROM v_window;
325+
326+
-- [SPARK-28428] Spark `exclude` always expecting `()`
327+
-- CREATE OR REPLACE TEMP VIEW v_window AS
328+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
329+
-- exclude group) as sum_rows FROM range(1, 10) i;
330+
-- SELECT * FROM v_window;
331+
332+
-- [SPARK-28428] Spark `exclude` always expecting `()`
333+
-- CREATE OR REPLACE TEMP VIEW v_window AS
334+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
335+
-- exclude ties) as sum_rows FROM generate_series(1, 10) i;
336+
337+
-- [SPARK-28428] Spark `exclude` always expecting `()`
338+
-- CREATE OR REPLACE TEMP VIEW v_window AS
339+
-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
340+
-- exclude no others) as sum_rows FROM generate_series(1, 10) i;
341+
-- SELECT * FROM v_window;
342+
343+
-- [SPARK-28648] Adds support to `groups` unit type in window clauses
344+
-- CREATE OR REPLACE TEMP VIEW v_window AS
345+
-- SELECT i.id, sum(i.id) over (order by i.id groups between 1 preceding and 1 following) as sum_rows FROM range(1, 11) i;
346+
-- SELECT * FROM v_window;
347+
348+
DROP VIEW v_window;
349+
-- [SPARK-29540] Thrift in some cases can't parse string to date
350+
-- DROP TABLE empsalary;
351+
DROP VIEW tenk2;
352+
DROP VIEW int4_tbl;

0 commit comments

Comments
 (0)