Skip to content

Commit

Permalink
refactor(joins): require explicit abstract table as RHS of joins (#9661)
Browse files Browse the repository at this point in the history
## Description of changes

We have (had) limited support for passing in in-memory objects as the
RHS of a join, where we would create a memtable for the user and then
use that.  For backends where memtable creation is expensive, or for
queries where there may be multiple calls to the same in-memory data, it
is better to be explicit and first register the in-memory data with the
backend using either `memtable` or `create_table`.

BREAKING CHANGE: Passing a `pyarrow.Table` or a `pandas.DataFrame` as
the right-hand-side of a join is no longer supported.

To join against in-memory data, you can pass the in-memory object to
`ibis.memtable` or `con.create_table` and use the resulting table object
instead.


## Issues closed

* Resolves #9571
  • Loading branch information
gforsyth authored Sep 23, 2024
1 parent 10b38ee commit 8166717
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 27 deletions.
7 changes: 5 additions & 2 deletions ibis/backends/tests/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ def test_join_with_pandas(batting, awards_players):
batting_filt = batting.filter(lambda t: t.yearID < 1900)
awards_players_filt = awards_players.filter(lambda t: t.yearID < 1900).execute()
assert isinstance(awards_players_filt, pd.DataFrame)
expr = batting_filt.join(awards_players_filt, "yearID")
t = ibis.memtable(awards_players_filt)
expr = batting_filt.join(t, "yearID")
df = expr.execute()
assert df.yearID.nunique() == 7

Expand All @@ -206,7 +207,9 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players):

assert sch.infer(awards_players_filt) == sch.Schema(dict(yearID="int"))
assert isinstance(awards_players_filt, pd.DataFrame)
expr = batting_filt.join(awards_players_filt, "yearID")

t = ibis.memtable(awards_players_filt)
expr = batting_filt.join(t, "yearID")
df = expr.execute()
assert df.yearID.nunique() == 7

Expand Down
29 changes: 5 additions & 24 deletions ibis/expr/types/joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

from public import public

import ibis
import ibis.expr.operations as ops
from ibis import util
from ibis.common.deferred import Deferred
from ibis.common.egraph import DisjointSet
from ibis.common.exceptions import (
ExpressionError,
IbisInputError,
IbisTypeError,
InputTypeError,
IntegrityError,
)
Expand All @@ -31,28 +31,6 @@
from ibis.expr.operations.relations import JoinKind


def coerce_to_table(data):
try:
import pandas as pd
except ImportError:
pass
else:
if isinstance(data, pd.DataFrame):
return ibis.memtable(data)

try:
import pyarrow as pa
except ImportError:
pass
else:
if isinstance(data, pa.Table):
return ibis.memtable(data)

if not isinstance(data, Table):
raise TypeError(f"right operand must be a Table, got {type(data).__name__}")
return data


def disambiguate_fields(
how,
predicates,
Expand Down Expand Up @@ -254,7 +232,10 @@ def join(
lname: str = "",
rname: str = "{name}_right",
):
right = coerce_to_table(right)
if not isinstance(right, Table):
raise IbisTypeError(
f"Right side of join must be an Ibis table, got {type(right)}."
)

if how == "left_semi":
how = "semi"
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/expr/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,7 +1315,7 @@ def test_join_invalid_expr_type(con):
invalid_right = left.foo_id
join_key = ["bar_id"]

with pytest.raises(TypeError):
with pytest.raises(com.IbisTypeError):
left.inner_join(invalid_right, join_key)


Expand Down

0 comments on commit 8166717

Please sign in to comment.