Skip to content

NotImplementedError when trying to run "Why PyDough" example #475

@MarcoGorelli

Description

@MarcoGorelli

I'm trying to run an example like the one listed in https://github.com/bodo-ai/PyDough?tab=readme-ov-file#what-is-pydough and have written

import pydough
%load_ext pydough.jupyter_extensions

pydough.active_session.load_metadata_graph("tpch_demo_graph.json", "TPCH")
%%pydough

result = customers
print(pydough.to_sql(result))

This prints

SELECT
  c_custkey AS key,
  c_name AS name,
  c_address AS address,
  c_nationkey AS nation_key,
  c_phone AS phone,
  c_acctbal AS account_balance,
  c_mktsegment AS market_segment,
  c_comment AS comment
FROM main.customer

I then run

%%pydough

result = customers.CALCULATE(name, x=SUM(account_balance))
print(pydough.to_sql(result))

and get

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[12], line 4
      2 _ROOT = UnqualifiedRoot(pydough.active_session.metadata)
      3 result = _ROOT.customers.CALCULATE(_ROOT.name, x=_ROOT.SUM(_ROOT.account_balance))
----> 4 print(pydough.to_sql(result))

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/evaluation/evaluate_unqualified.py:166, in to_sql(node, **kwargs)
    164 if not isinstance(qualified, PyDoughCollectionQDAG):
    165     raise pydough.active_session.error_builder.expected_collection(qualified)
--> 166 relational: RelationalRoot = convert_ast_to_relational(
    167     qualified, column_selection, session
    168 )
    169 return convert_relation_to_sql(relational, session, max_rows)

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/relational_converter.py:1721, in convert_ast_to_relational(node, columns, session)
   1718 # Convert the QDAG node to a hybrid tree, including any necessary
   1719 # transformations such as de-correlation.
   1720 hybrid_translator: HybridTranslator = HybridTranslator(session)
-> 1721 hybrid: HybridTree = hybrid_translator.convert_qdag_to_hybrid(node)
   1723 # Then, invoke relational conversion procedure. The first element in the
   1724 # returned list is the final relational tree.
   1725 output: TranslationOutput = rel_translator.rel_translation(
   1726     hybrid, len(hybrid.pipeline) - 1
   1727 )

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/hybrid_translator.py:1696, in HybridTranslator.convert_qdag_to_hybrid(self, node)
   1684 """
   1685 Convert a PyDough QDAG node to a hybrid tree, including any necessary
   1686 transformations such as de-correlation.
   (...)   1693     transformations.
   1694 """
   1695 # 1. Run the initial conversion from QDAG to Hybrid
-> 1696 hybrid: HybridTree = self.make_hybrid_tree(node, None)
   1697 # 2. Eject any aggregate inputs from the hybrid tree.
   1698 self.eject_aggregate_inputs(hybrid)

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/hybrid_translator.py:1446, in HybridTranslator.make_hybrid_tree(self, node, parent, is_aggregate)
   1444     return successor_hybrid
   1445 case Calculate():
-> 1446     hybrid = self.make_hybrid_tree(
   1447         node.preceding_context, parent, is_aggregate
   1448     )
   1449     self.populate_children(hybrid, node, child_ref_mapping)
   1450     new_expressions: dict[str, HybridExpr] = {}

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/hybrid_translator.py:1452, in HybridTranslator.make_hybrid_tree(self, node, parent, is_aggregate)
   1450 new_expressions: dict[str, HybridExpr] = {}
   1451 for name in sorted(node.calc_terms):
-> 1452     expr = self.make_hybrid_expr(
   1453         hybrid, node.get_expr(name), child_ref_mapping, False
   1454     )
   1455     new_expressions[name] = expr
   1456 hybrid.add_operation(
   1457     HybridCalculate(
   1458         hybrid.pipeline[-1],
   (...)   1461     )
   1462 )

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/hybrid_translator.py:1152, in HybridTranslator.make_hybrid_expr(self, hybrid, expr, child_ref_mapping, inside_agg)
   1143     args.append(
   1144         self.make_hybrid_expr(
   1145             hybrid,
   (...)   1149         )
   1150     )
   1151 if expr.operator.is_aggregation:
-> 1152     return self.make_agg_call(hybrid, expr, args)
   1153 else:
   1154     return HybridFunctionExpr(expr.operator, args, expr.pydough_type)

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/hybrid_translator.py:677, in HybridTranslator.make_agg_call(self, hybrid, expr, args)
    659 """
    660 For aggregate function calls, their arguments are translated in a
    661 manner that identifies what child subtree they correspond too, by
   (...)    673     `args`: the converted arguments to the aggregation call.
    674 """
    675 child_indices: set[int] = set()
    676 converted_args: list[HybridExpr] = [
--> 677     self.convert_agg_arg(arg, child_indices) for arg in args
    678 ]
    679 if len(child_indices) != 1:
    680     raise ValueError(
    681         f"Expected aggregation call to contain references to exactly one child collection, but found {len(child_indices)} in {expr}"
    682     )

File ~/scratch/.venv/lib/python3.12/site-packages/pydough/conversion/hybrid_translator.py:641, in HybridTranslator.convert_agg_arg(self, expr, child_indices)
    637     raise NotImplementedError(
    638         "PyDough does yet support aggregations whose arguments mix between subcollection data of the current context and fields of an ancestor of the current context"
    639     )
    640 case HybridRefExpr():
--> 641     raise NotImplementedError(
    642         "PyDough does yet support aggregations whose arguments mix between subcollection data of the current context and fields of the context itself"
    643     )
    644 case HybridWindowExpr():
    645     raise NotImplementedError(
    646         "PyDough does yet support aggregations whose arguments mix between subcollection data of the current context and window functions"
    647     )

NotImplementedError: PyDough does yet support aggregations whose arguments mix between subcollection data of the current context and fields of the context itself

Given that this is very similar to the README example, I was expecting it to work

Could you please help me with what I might be doing wrong?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions