Skip to content

Commit a80ac3f

Browse files
authored
refactor: fix several small presubmit test failures on the SQLGlot compiler (#2406)
This change fixes several presubmit test failures in #2248 Fixes internal issue 417774347 🦕
1 parent 5ac6810 commit a80ac3f

File tree

9 files changed

+100
-133
lines changed

9 files changed

+100
-133
lines changed

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ def _(
527527
else:
528528
result = apply_window_if_present(result, window)
529529

530-
if op.should_floor_result:
530+
if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE:
531531
result = sge.Cast(this=sge.func("FLOOR", result), to="INT64")
532532
return result
533533

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,16 @@ def compile_concat(node: nodes.ConcatNode, *children: ir.SQLGlotIR) -> ir.SQLGlo
266266
assert len(children) >= 1
267267
uid_gen = children[0].uid_gen
268268

269-
output_ids = [id.sql for id in node.output_ids]
269+
# BigQuery `UNION` query takes the column names from the first `SELECT` clause.
270+
default_output_ids = [field.id.sql for field in node.child_nodes[0].fields]
271+
output_aliases = [
272+
(default_output_id, output_id.sql)
273+
for default_output_id, output_id in zip(default_output_ids, node.output_ids)
274+
]
275+
270276
return ir.SQLGlotIR.from_union(
271277
[child.expr for child in children],
272-
output_ids=output_ids,
278+
output_aliases=output_aliases,
273279
uid_gen=uid_gen,
274280
)
275281

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
252252
sg_expr = expr.expr
253253

254254
if from_type == dtypes.STRING_DTYPE:
255-
func_name = "PARSE_JSON_IN_SAFE" if op.safe else "PARSE_JSON"
255+
func_name = "SAFE.PARSE_JSON" if op.safe else "PARSE_JSON"
256256
return sge.func(func_name, sg_expr)
257257
if from_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE, dtypes.FLOAT_DTYPE):
258258
sg_expr = sge.Cast(this=sg_expr, to="STRING")

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ def from_query_string(
170170
cls,
171171
query_string: str,
172172
) -> SQLGlotIR:
173-
"""Builds a SQLGlot expression from a query string"""
173+
"""Builds a SQLGlot expression from a query string. Wrapping the query
174+
in a CTE can avoid the query parsing issue for unsupported syntax in
175+
SQLGlot."""
174176
uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator()
175177
cte_name = sge.to_identifier(
176178
next(uid_gen.get_uid_stream("bfcte_")), quoted=cls.quoted
@@ -187,7 +189,7 @@ def from_query_string(
187189
def from_union(
188190
cls,
189191
selects: typing.Sequence[sge.Select],
190-
output_ids: typing.Sequence[str],
192+
output_aliases: typing.Sequence[typing.Tuple[str, str]],
191193
uid_gen: guid.SequentialUIDGenerator,
192194
) -> SQLGlotIR:
193195
"""Builds a SQLGlot expression by unioning of multiple select expressions."""
@@ -196,7 +198,7 @@ def from_union(
196198
), f"At least two select expressions must be provided, but got {selects}."
197199

198200
existing_ctes: list[sge.CTE] = []
199-
union_selects: list[sge.Expression] = []
201+
union_selects: list[sge.Select] = []
200202
for select in selects:
201203
assert isinstance(
202204
select, sge.Select
@@ -205,37 +207,27 @@ def from_union(
205207
select_expr = select.copy()
206208
select_expr, select_ctes = _pop_query_ctes(select_expr)
207209
existing_ctes = [*existing_ctes, *select_ctes]
208-
209-
new_cte_name = sge.to_identifier(
210-
next(uid_gen.get_uid_stream("bfcte_")), quoted=cls.quoted
211-
)
212-
new_cte = sge.CTE(
213-
this=select_expr,
214-
alias=new_cte_name,
210+
union_selects.append(select_expr)
211+
212+
union_expr: sge.Query = union_selects[0].subquery()
213+
for select in union_selects[1:]:
214+
union_expr = sge.Union(
215+
this=union_expr,
216+
expression=select.subquery(),
217+
distinct=False,
218+
copy=False,
215219
)
216-
existing_ctes = [*existing_ctes, new_cte]
217220

218-
selections = [
219-
sge.Alias(
220-
this=sge.to_identifier(expr.alias_or_name, quoted=cls.quoted),
221-
alias=sge.to_identifier(output_id, quoted=cls.quoted),
222-
)
223-
for expr, output_id in zip(select_expr.expressions, output_ids)
224-
]
225-
union_selects.append(
226-
sge.Select().select(*selections).from_(sge.Table(this=new_cte_name))
221+
selections = [
222+
sge.Alias(
223+
this=sge.to_identifier(old_name, quoted=cls.quoted),
224+
alias=sge.to_identifier(new_name, quoted=cls.quoted),
227225
)
228-
229-
union_expr = typing.cast(
230-
sge.Select,
231-
functools.reduce(
232-
lambda x, y: sge.Union(
233-
this=x, expression=y, distinct=False, copy=False
234-
),
235-
union_selects,
236-
),
226+
for old_name, new_name in output_aliases
227+
]
228+
final_select_expr = (
229+
sge.Select().select(*selections).from_(union_expr.subquery())
237230
)
238-
final_select_expr = sge.Select().select(sge.Star()).from_(union_expr.subquery())
239231
final_select_expr = _set_query_ctes(final_select_expr, existing_ctes)
240232
return cls(expr=final_select_expr, uid_gen=uid_gen)
241233

tests/system/small/test_session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ def test_read_gbq_w_primary_keys_table(
352352
pd.testing.assert_frame_equal(result, sorted_result)
353353

354354
# Verify that we're working from a snapshot rather than a copy of the table.
355-
assert "FOR SYSTEM_TIME AS OF TIMESTAMP" in df.sql
355+
assert "FOR SYSTEM_TIME AS OF" in df.sql
356356

357357

358358
def test_read_gbq_w_primary_keys_table_and_filters(

tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_astype_json/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ WITH `bfcte_0` AS (
1313
PARSE_JSON(CAST(`bool_col` AS STRING)) AS `bfcol_6`,
1414
PARSE_JSON(`string_col`) AS `bfcol_7`,
1515
PARSE_JSON(CAST(`bool_col` AS STRING)) AS `bfcol_8`,
16-
PARSE_JSON_IN_SAFE(`string_col`) AS `bfcol_9`
16+
SAFE.PARSE_JSON(`string_col`) AS `bfcol_9`
1717
FROM `bfcte_0`
1818
)
1919
SELECT

tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,6 @@ WITH `bfcte_1` AS (
1414
*,
1515
0 AS `bfcol_8`
1616
FROM `bfcte_3`
17-
), `bfcte_6` AS (
18-
SELECT
19-
`rowindex` AS `bfcol_9`,
20-
`rowindex` AS `bfcol_10`,
21-
`int64_col` AS `bfcol_11`,
22-
`string_col` AS `bfcol_12`,
23-
`bfcol_8` AS `bfcol_13`,
24-
`bfcol_7` AS `bfcol_14`
25-
FROM `bfcte_5`
2617
), `bfcte_0` AS (
2718
SELECT
2819
`int64_col`,
@@ -39,44 +30,44 @@ WITH `bfcte_1` AS (
3930
*,
4031
1 AS `bfcol_23`
4132
FROM `bfcte_2`
42-
), `bfcte_7` AS (
43-
SELECT
44-
`rowindex` AS `bfcol_24`,
45-
`rowindex` AS `bfcol_25`,
46-
`int64_col` AS `bfcol_26`,
47-
`string_col` AS `bfcol_27`,
48-
`bfcol_23` AS `bfcol_28`,
49-
`bfcol_22` AS `bfcol_29`
50-
FROM `bfcte_4`
51-
), `bfcte_8` AS (
33+
), `bfcte_6` AS (
5234
SELECT
53-
*
35+
`bfcol_9` AS `bfcol_30`,
36+
`bfcol_10` AS `bfcol_31`,
37+
`bfcol_11` AS `bfcol_32`,
38+
`bfcol_12` AS `bfcol_33`,
39+
`bfcol_13` AS `bfcol_34`,
40+
`bfcol_14` AS `bfcol_35`
5441
FROM (
55-
SELECT
56-
`bfcol_9` AS `bfcol_30`,
57-
`bfcol_10` AS `bfcol_31`,
58-
`bfcol_11` AS `bfcol_32`,
59-
`bfcol_12` AS `bfcol_33`,
60-
`bfcol_13` AS `bfcol_34`,
61-
`bfcol_14` AS `bfcol_35`
62-
FROM `bfcte_6`
42+
(
43+
SELECT
44+
`rowindex` AS `bfcol_9`,
45+
`rowindex` AS `bfcol_10`,
46+
`int64_col` AS `bfcol_11`,
47+
`string_col` AS `bfcol_12`,
48+
`bfcol_8` AS `bfcol_13`,
49+
`bfcol_7` AS `bfcol_14`
50+
FROM `bfcte_5`
51+
)
6352
UNION ALL
64-
SELECT
65-
`bfcol_24` AS `bfcol_30`,
66-
`bfcol_25` AS `bfcol_31`,
67-
`bfcol_26` AS `bfcol_32`,
68-
`bfcol_27` AS `bfcol_33`,
69-
`bfcol_28` AS `bfcol_34`,
70-
`bfcol_29` AS `bfcol_35`
71-
FROM `bfcte_7`
53+
(
54+
SELECT
55+
`rowindex` AS `bfcol_24`,
56+
`rowindex` AS `bfcol_25`,
57+
`int64_col` AS `bfcol_26`,
58+
`string_col` AS `bfcol_27`,
59+
`bfcol_23` AS `bfcol_28`,
60+
`bfcol_22` AS `bfcol_29`
61+
FROM `bfcte_4`
62+
)
7263
)
7364
)
7465
SELECT
7566
`bfcol_30` AS `rowindex`,
7667
`bfcol_31` AS `rowindex_1`,
7768
`bfcol_32` AS `int64_col`,
7869
`bfcol_33` AS `string_col`
79-
FROM `bfcte_8`
70+
FROM `bfcte_6`
8071
ORDER BY
8172
`bfcol_34` ASC NULLS LAST,
8273
`bfcol_35` ASC NULLS LAST

tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql

Lines changed: 38 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,6 @@ WITH `bfcte_2` AS (
1313
*,
1414
0 AS `bfcol_5`
1515
FROM `bfcte_6`
16-
), `bfcte_13` AS (
17-
SELECT
18-
`float64_col` AS `bfcol_6`,
19-
`int64_col` AS `bfcol_7`,
20-
`bfcol_5` AS `bfcol_8`,
21-
`bfcol_4` AS `bfcol_9`
22-
FROM `bfcte_10`
2316
), `bfcte_0` AS (
2417
SELECT
2518
`bool_col`,
@@ -42,13 +35,6 @@ WITH `bfcte_2` AS (
4235
*,
4336
1 AS `bfcol_16`
4437
FROM `bfcte_8`
45-
), `bfcte_14` AS (
46-
SELECT
47-
`float64_col` AS `bfcol_17`,
48-
`int64_too` AS `bfcol_18`,
49-
`bfcol_16` AS `bfcol_19`,
50-
`bfcol_15` AS `bfcol_20`
51-
FROM `bfcte_12`
5238
), `bfcte_1` AS (
5339
SELECT
5440
`float64_col`,
@@ -64,13 +50,6 @@ WITH `bfcte_2` AS (
6450
*,
6551
2 AS `bfcol_26`
6652
FROM `bfcte_5`
67-
), `bfcte_15` AS (
68-
SELECT
69-
`float64_col` AS `bfcol_27`,
70-
`int64_col` AS `bfcol_28`,
71-
`bfcol_26` AS `bfcol_29`,
72-
`bfcol_25` AS `bfcol_30`
73-
FROM `bfcte_9`
7453
), `bfcte_0` AS (
7554
SELECT
7655
`bool_col`,
@@ -93,50 +72,54 @@ WITH `bfcte_2` AS (
9372
*,
9473
3 AS `bfcol_37`
9574
FROM `bfcte_7`
96-
), `bfcte_16` AS (
97-
SELECT
98-
`float64_col` AS `bfcol_38`,
99-
`int64_too` AS `bfcol_39`,
100-
`bfcol_37` AS `bfcol_40`,
101-
`bfcol_36` AS `bfcol_41`
102-
FROM `bfcte_11`
103-
), `bfcte_17` AS (
75+
), `bfcte_13` AS (
10476
SELECT
105-
*
77+
`bfcol_6` AS `bfcol_42`,
78+
`bfcol_7` AS `bfcol_43`,
79+
`bfcol_8` AS `bfcol_44`,
80+
`bfcol_9` AS `bfcol_45`
10681
FROM (
107-
SELECT
108-
`bfcol_6` AS `bfcol_42`,
109-
`bfcol_7` AS `bfcol_43`,
110-
`bfcol_8` AS `bfcol_44`,
111-
`bfcol_9` AS `bfcol_45`
112-
FROM `bfcte_13`
82+
(
83+
SELECT
84+
`float64_col` AS `bfcol_6`,
85+
`int64_col` AS `bfcol_7`,
86+
`bfcol_5` AS `bfcol_8`,
87+
`bfcol_4` AS `bfcol_9`
88+
FROM `bfcte_10`
89+
)
11390
UNION ALL
114-
SELECT
115-
`bfcol_17` AS `bfcol_42`,
116-
`bfcol_18` AS `bfcol_43`,
117-
`bfcol_19` AS `bfcol_44`,
118-
`bfcol_20` AS `bfcol_45`
119-
FROM `bfcte_14`
91+
(
92+
SELECT
93+
`float64_col` AS `bfcol_17`,
94+
`int64_too` AS `bfcol_18`,
95+
`bfcol_16` AS `bfcol_19`,
96+
`bfcol_15` AS `bfcol_20`
97+
FROM `bfcte_12`
98+
)
12099
UNION ALL
121-
SELECT
122-
`bfcol_27` AS `bfcol_42`,
123-
`bfcol_28` AS `bfcol_43`,
124-
`bfcol_29` AS `bfcol_44`,
125-
`bfcol_30` AS `bfcol_45`
126-
FROM `bfcte_15`
100+
(
101+
SELECT
102+
`float64_col` AS `bfcol_27`,
103+
`int64_col` AS `bfcol_28`,
104+
`bfcol_26` AS `bfcol_29`,
105+
`bfcol_25` AS `bfcol_30`
106+
FROM `bfcte_9`
107+
)
127108
UNION ALL
128-
SELECT
129-
`bfcol_38` AS `bfcol_42`,
130-
`bfcol_39` AS `bfcol_43`,
131-
`bfcol_40` AS `bfcol_44`,
132-
`bfcol_41` AS `bfcol_45`
133-
FROM `bfcte_16`
109+
(
110+
SELECT
111+
`float64_col` AS `bfcol_38`,
112+
`int64_too` AS `bfcol_39`,
113+
`bfcol_37` AS `bfcol_40`,
114+
`bfcol_36` AS `bfcol_41`
115+
FROM `bfcte_11`
116+
)
134117
)
135118
)
136119
SELECT
137120
`bfcol_42` AS `float64_col`,
138121
`bfcol_43` AS `int64_col`
139-
FROM `bfcte_17`
122+
FROM `bfcte_13`
140123
ORDER BY
141124
`bfcol_44` ASC NULLS LAST,
142125
`bfcol_45` ASC NULLS LAST

tests/unit/core/compile/sqlglot/test_compile_readlocal.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import sys
16-
1715
import numpy as np
1816
import pandas as pd
1917
import pytest
@@ -36,7 +34,6 @@ def test_compile_readlocal_w_structs_df(
3634
compiler_session_w_nested_structs_types: bigframes.Session,
3735
snapshot,
3836
):
39-
# TODO(b/427306734): Check why the output is different from the expected output.
4037
bf_df = bpd.DataFrame(
4138
nested_structs_pandas_df, session=compiler_session_w_nested_structs_types
4239
)
@@ -66,8 +63,6 @@ def test_compile_readlocal_w_json_df(
6663
def test_compile_readlocal_w_special_values(
6764
compiler_session: bigframes.Session, snapshot
6865
):
69-
if sys.version_info < (3, 12):
70-
pytest.skip("Skipping test due to inconsistent SQL formatting")
7166
df = pd.DataFrame(
7267
{
7368
"col_none": [None, 1, 2],

0 commit comments

Comments
 (0)