Skip to content

Commit a0a6cff

Browse files
committed
update datafusion to 5.1.0 for python binding
1 parent 7932cb9 commit a0a6cff

File tree

4 files changed

+16
-11
lines changed

4 files changed

+16
-11
lines changed

python/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717

1818
[package]
19-
name = "datafusion"
19+
name = "datafusion-python"
2020
version = "0.3.0"
2121
homepage = "https://github.com/apache/arrow"
2222
repository = "https://github.com/apache/arrow"
@@ -31,7 +31,8 @@ libc = "0.2"
3131
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] }
3232
rand = "0.7"
3333
pyo3 = { version = "0.14.1", features = ["extension-module", "abi3", "abi3-py36"] }
34-
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4d61196dee8526998aee7e7bb10ea88422e5f9e1" }
34+
datafusion = { path = "../datafusion", version = "5.1.0" }
35+
proc-macro2 = { version = "=1.0.28" }
3536

3637
[lib]
3738
name = "datafusion"

python/src/dataframe.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,13 @@ impl DataFrame {
161161
Ok(pretty::print_batches(&batches).unwrap())
162162
}
163163

164-
165164
/// Returns the join of two DataFrames `on`.
166-
fn join(&self, right: &DataFrame, on: Vec<&str>, how: &str) -> PyResult<Self> {
165+
fn join(
166+
&self,
167+
right: &DataFrame,
168+
join_keys: (Vec<&str>, Vec<&str>),
169+
how: &str,
170+
) -> PyResult<Self> {
167171
let builder = LogicalPlanBuilder::from(self.plan.clone());
168172

169173
let join_type = match how {
@@ -182,7 +186,7 @@ impl DataFrame {
182186
}
183187
};
184188

185-
let builder = errors::wrap(builder.join(&right.plan, join_type, on.clone(), on))?;
189+
let builder = errors::wrap(builder.join(&right.plan, join_type, join_keys))?;
186190

187191
let plan = errors::wrap(builder.build())?;
188192

python/tests/test_df.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def test_join():
104104
)
105105
df1 = ctx.create_dataframe([[batch]])
106106

107-
df = df.join(df1, on="a", how="inner")
107+
df = df.join(df1, join_keys=(["a"], ["a"]), how="inner")
108108
df = df.sort([f.col("a").sort(ascending=True)])
109109
table = pa.Table.from_batches(df.collect())
110110

python/tests/test_sql.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def test_register_csv(ctx, tmp_path):
6969
for table in ["csv", "csv1", "csv2"]:
7070
result = ctx.sql(f"SELECT COUNT(int) FROM {table}").collect()
7171
result = pa.Table.from_batches(result)
72-
assert result.to_pydict() == {"COUNT(int)": [4]}
72+
assert result.to_pydict() == {f"COUNT({table}.int)": [4]}
7373

7474
result = ctx.sql("SELECT * FROM csv3").collect()
7575
result = pa.Table.from_batches(result)
@@ -88,7 +88,7 @@ def test_register_parquet(ctx, tmp_path):
8888

8989
result = ctx.sql("SELECT COUNT(a) FROM t").collect()
9090
result = pa.Table.from_batches(result)
91-
assert result.to_pydict() == {"COUNT(a)": [100]}
91+
assert result.to_pydict() == {"COUNT(t.a)": [100]}
9292

9393

9494
def test_execute(ctx, tmp_path):
@@ -123,8 +123,8 @@ def test_execute(ctx, tmp_path):
123123
result_values = []
124124
for result in results:
125125
pydict = result.to_pydict()
126-
result_keys.extend(pydict["CAST(a AS Int32)"])
127-
result_values.extend(pydict["COUNT(a)"])
126+
result_keys.extend(pydict["CAST(t.a AS Int32)"])
127+
result_values.extend(pydict["COUNT(t.a)"])
128128

129129
result_keys, result_values = (
130130
list(t) for t in zip(*sorted(zip(result_keys, result_values)))
@@ -141,7 +141,7 @@ def test_execute(ctx, tmp_path):
141141
expected_cast = pa.array([50, 50], pa.int32())
142142
expected = [
143143
pa.RecordBatch.from_arrays(
144-
[expected_a, expected_cast], ["a", "CAST(a AS Int32)"]
144+
[expected_a, expected_cast], ["a", "CAST(t.a AS Int32)"]
145145
)
146146
]
147147
np.testing.assert_equal(expected[0].column(1), expected[0].column(1))

0 commit comments

Comments
 (0)