-
Notifications
You must be signed in to change notification settings - Fork 180
Open
Labels
Description
Necessity is for better users (and self) ergonomics.
Backend support
dask.dataframe.DataFrame.astypeibis.Table.castpandas.DataFrame.astypepolars.LazyFrame.castpyarrow.Table.castpyspark.sql.DataFrame.to
Related
- feat: Add
concat(..., how="*_relaxed"})#3398 (comment) - feat: add
DataFrame|LazyFrame.castmethod #1045
Originally posted by @dangotbanned in #3398 (comment)
What's here could be both more efficient and shorter:
narwhals/narwhals/_arrow/namespace.py
Lines 180 to 194 in 8fabb13
| def _concat_diagonal_relaxed(self, dfs: Sequence[pa.Table], /) -> pa.Table: | |
| native_schemas = tuple(table.schema for table in dfs) | |
| out_schema = reduce( | |
| lambda x, y: to_supertype(*combine_schemas(x, y)), | |
| (Schema.from_arrow(pa_schema) for pa_schema in native_schemas), | |
| ).to_arrow() | |
| to_schemas = ( | |
| pa.schema([out_schema.field(name) for name in native_schema.names]) | |
| for native_schema in native_schemas | |
| ) | |
| to_concat = tuple( | |
| table.cast(to_schema) for table, to_schema in zip(dfs, to_schemas) | |
| ) | |
| return self._concat_diagonal(to_concat) | |
Show ArrowDataFrame.cast diff
+++ b/narwhals/_arrow/dataframe.py
+ def cast(self, schema: IntoSchema | Iterable[tuple[str, DType]]) -> Self:
+ return self._with_native(
+ self.native.cast(Schema(schema).to_arrow()), validate_column_names=False
+ )
+
+++ b/narwhals/_arrow/namespace.py
-from narwhals._arrow.utils import cast_to_comparable_string_types
+from narwhals._arrow.utils import cast_to_comparable_string_types, concat_tables
-from narwhals.schema import Schema, combine_schemas, to_supertype
+from narwhals.schema import Schema, to_supertype, to_supertype_diagonalWith those changes, we can avoid materializing as much all at once using generators:
def concat_diagonal_relaxed(self, dfs: Sequence[ArrowDataFrame], /) -> ArrowDataFrame:
schemas = [Schema(df.schema) for df in dfs]
supertypes = reduce(to_supertype_diagonal, schemas)
to_concat = (
df.cast((name, supertypes[name]) for name in schema).native
for df, schema in zip(dfs, schemas)
)
return dfs[0]._with_native(concat_tables(to_concat))Reactions are currently unavailable