Skip to content

Commit 879ea44

Browse files
committed
address comments
1 parent e493025 commit 879ea44

File tree

5 files changed

+39
-29
lines changed

5 files changed

+39
-29
lines changed

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,22 @@
2121
import pyarrow as pa
2222
import sqlglot.expressions as sge
2323

24-
from bigframes.core import expression, guid, nodes, rewrite
24+
from bigframes.core import expression, guid, identifiers, nodes, rewrite
2525
from bigframes.core.compile import configs
2626
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2727
import bigframes.core.compile.sqlglot.sqlglot_ir as ir
2828
import bigframes.core.ordering as bf_ordering
2929

3030

31-
@dataclasses.dataclass(frozen=True)
3231
class SQLGlotCompiler:
3332
"""Compiles BigFrame nodes into SQL using SQLGlot."""
3433

3534
uid_gen: guid.SequentialUIDGenerator
3635
"""Generator for unique identifiers."""
3736

37+
def __init__(self):
38+
self.uid_gen = guid.SequentialUIDGenerator()
39+
3840
def compile(
3941
self,
4042
node: nodes.BigFrameNode,
@@ -84,8 +86,8 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
8486
result_node = typing.cast(
8587
nodes.ResultNode, rewrite.column_pruning(result_node)
8688
)
87-
remap_node, _ = rewrite.remap_variables(result_node, self.uid_gen)
88-
sql = self._compile_result_node(typing.cast(nodes.ResultNode, remap_node))
89+
result_node = self._remap_variables(result_node)
90+
sql = self._compile_result_node(result_node)
8991
return configs.CompileResult(
9092
sql, result_node.schema.to_bigquery(), result_node.order_by
9193
)
@@ -94,8 +96,8 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
9496
result_node = dataclasses.replace(result_node, order_by=None)
9597
result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
9698

97-
remap_node, _ = rewrite.remap_variables(result_node, self.uid_gen)
98-
sql = self._compile_result_node(typing.cast(nodes.ResultNode, remap_node))
99+
result_node = self._remap_variables(result_node)
100+
sql = self._compile_result_node(result_node)
99101
# Return the ordering iff no extra columns are needed to define the row order
100102
if ordering is not None:
101103
output_order = (
@@ -108,6 +110,14 @@ def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult
108110
sql, result_node.schema.to_bigquery(), output_order
109111
)
110112

113+
def _remap_variables(self, node: nodes.ResultNode) -> nodes.ResultNode:
114+
"""Remaps `ColumnId`s in the BFET of a `ResultNode` to produce deterministic UIDs."""
115+
116+
result_node, _ = rewrite.remap_variables(
117+
node, map(identifiers.ColumnId, self.uid_gen.get_uid_stream("bfcol_"))
118+
)
119+
return typing.cast(nodes.ResultNode, result_node)
120+
111121
def _compile_result_node(self, root: nodes.ResultNode) -> str:
112122
sqlglot_ir = self.compile_node(root.child)
113123
# TODO: add order_by, limit, and selections to sqlglot_expr

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def _encapsulate_as_cte(
128128

129129
existing_ctes = select_expr.args.pop("with", [])
130130
new_cte_name = sge.to_identifier(
131-
self.uid_gen.generate_sequential_uid("bfcte_"), quoted=self.quoted
131+
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
132132
)
133133
new_cte = sge.CTE(
134134
this=select_expr,

bigframes/core/guid.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import typing
16+
1517
_GUID_COUNTER = 0
1618

1719

@@ -22,18 +24,17 @@ def generate_guid(prefix="col_"):
2224

2325

2426
class SequentialUIDGenerator:
25-
"""
26-
Generates sequential-like UIDs with multiple prefixes, e.g., "t0", "t1", "c0", "t2", etc.
27-
"""
27+
"""Yields a continuous stream of raw UID strings for the given prefix."""
2828

2929
def __init__(self):
30-
self.prefix_counters = {}
30+
self.prefix_counters: typing.Dict[str, int] = {}
3131

32-
def generate_sequential_uid(self, prefix: str) -> str:
33-
"""Generates a sequential UID with specified prefix."""
32+
def get_uid_stream(self, prefix: str) -> typing.Generator[str, None, None]:
33+
"""Returns next sequential UID with specified prefix."""
3434
if prefix not in self.prefix_counters:
3535
self.prefix_counters[prefix] = 0
3636

37-
uid = f"{prefix}{self.prefix_counters[prefix]}"
38-
self.prefix_counters[prefix] += 1
39-
return uid
37+
while True:
38+
uid = f"{prefix}{self.prefix_counters[prefix]}"
39+
self.prefix_counters[prefix] += 1
40+
yield uid

bigframes/core/rewrite/identifiers.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,19 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16-
from typing import Tuple
16+
import typing
1717

18-
from bigframes.core import guid, identifiers, nodes
18+
from bigframes.core import identifiers, nodes
1919

2020

2121
# TODO: May as well just outright remove selection nodes in this process.
2222
def remap_variables(
2323
root: nodes.BigFrameNode,
24-
uid_gen: guid.SequentialUIDGenerator,
25-
) -> Tuple[nodes.BigFrameNode, dict[identifiers.ColumnId, identifiers.ColumnId],]:
24+
id_generator: typing.Iterator[identifiers.ColumnId],
25+
) -> typing.Tuple[
26+
nodes.BigFrameNode,
27+
dict[identifiers.ColumnId, identifiers.ColumnId],
28+
]:
2629
"""Remaps `ColumnId`s in the BFET to produce deterministic and sequential UIDs.
2730
2831
Note: this will convert a DAG to a tree.
@@ -31,7 +34,7 @@ def remap_variables(
3134
ref_mapping = dict()
3235
# Sequential ids are assigned bottom-up left-to-right
3336
for child in root.child_nodes:
34-
new_child, child_var_mapping = remap_variables(child, uid_gen=uid_gen)
37+
new_child, child_var_mapping = remap_variables(child, id_generator=id_generator)
3538
child_replacement_map[child] = new_child
3639
ref_mapping.update(child_var_mapping)
3740

@@ -42,10 +45,7 @@ def remap_variables(
4245

4346
with_new_refs = with_new_children.remap_refs(ref_mapping)
4447

45-
node_var_mapping = {
46-
old_id: identifiers.ColumnId(name=uid_gen.generate_sequential_uid("bfcol_"))
47-
for old_id in root.node_defined_ids
48-
}
48+
node_var_mapping = {old_id: next(id_generator) for old_id in root.node_defined_ids}
4949
with_new_vars = with_new_refs.remap_vars(node_var_mapping)
5050
with_new_vars._validate()
5151

tests/unit/core/compile/sqlglot/compiler_session.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import bigframes.core
2020
import bigframes.core.compile.sqlglot as sqlglot
21-
import bigframes.core.guid
2221
import bigframes.dataframe
2322
import bigframes.session.executor
2423
import bigframes.session.metrics
@@ -42,9 +41,9 @@ def to_sql(
4241

4342
# Compared with BigQueryCachingExecutor, SQLCompilerExecutor skips
4443
# caching the subtree.
45-
return self.compiler.SQLGlotCompiler(
46-
uid_gen=bigframes.core.guid.SequentialUIDGenerator()
47-
).compile(array_value.node, ordered=ordered)
44+
return self.compiler.SQLGlotCompiler().compile(
45+
array_value.node, ordered=ordered
46+
)
4847

4948

5049
class SQLCompilerSession(bigframes.session.Session):

0 commit comments

Comments
 (0)