Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._lib.column in favor of pylibcudf. #17760

Open
wants to merge 21 commits into
base: branch-25.04
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b511f4a
Remove cudf._lib.column
mroeschke Jan 16, 2025
94a96f0
Start working through circular import errors
mroeschke Jan 16, 2025
119c0fd
Remove other access of cudf._lib.column as libcudf
mroeschke Jan 16, 2025
00d8709
Fix more tests
mroeschke Jan 16, 2025
bff0fa5
Add column_from_self_view, and fix other tests
mroeschke Jan 17, 2025
0581459
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/c…
mroeschke Jan 17, 2025
1034df9
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/c…
mroeschke Jan 21, 2025
cd60209
Type output of dtype_from_pylibcudf_column
mroeschke Jan 21, 2025
d578f6f
Replace column_from_self_view with existing copy
mroeschke Jan 21, 2025
cde7573
Merge branch 'branch-25.02' into cudf/_lib/column
mroeschke Jan 22, 2025
01b20bc
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/c…
mroeschke Jan 24, 2025
323b1b0
Merge branch 'cudf/_lib/column' of https://github.com/mroeschke/cudf …
mroeschke Jan 24, 2025
c1ff886
Fold in some doc changes from scalar refactor
mroeschke Jan 24, 2025
1e85b3e
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/c…
mroeschke Jan 24, 2025
622b4ef
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/c…
mroeschke Jan 27, 2025
b3aa5cd
Adjust search_sorted output return
mroeschke Jan 27, 2025
16626f2
Remove c_value
mroeschke Jan 27, 2025
5e134e6
Merge branch 'branch-25.02' into cudf/_lib/column
galipremsagar Jan 28, 2025
05936c6
Merge remote-tracking branch 'upstream/branch-25.02' into cudf/_lib/c…
mroeschke Jan 28, 2025
ec92112
Merge branch 'cudf/_lib/column' of https://github.com/mroeschke/cudf …
mroeschke Jan 28, 2025
5a9725e
Merge remote-tracking branch 'upstream/branch-25.04' into cudf/_lib/c…
mroeschke Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove other access of cudf._lib.column as libcudf
  • Loading branch information
mroeschke committed Jan 16, 2025
commit 119c0fdc3a1298f21417924c974f9064cbdfd9b8
12 changes: 7 additions & 5 deletions python/cudf/cudf/core/_internals/timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import pylibcudf as plc

import cudf
from cudf.core.column import ColumnBase

if TYPE_CHECKING:
from cudf.core.column.datetime import DatetimeColumn
Expand Down Expand Up @@ -116,17 +115,20 @@ def _read_tzfile_as_columns(
plc_table = plc.io.timezone.make_timezone_transition_table(
tzdir, zone_name
)
transition_times_and_offsets = [
ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
]
transition_times_and_offsets = plc_table.columns()

if not transition_times_and_offsets:
from cudf.core.column.column import as_column

# this happens for UTC-like zones
min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]")
return (as_column([min_date]), as_column([np.timedelta64(0, "s")])) # type: ignore[return-value]
return tuple(transition_times_and_offsets) # type: ignore[return-value]

from cudf.core.column import ColumnBase

return tuple(
ColumnBase.from_pylibcudf(col) for col in transition_times_and_offsets
) # type: ignore[return-value]


def check_ambiguous_and_nonexistent(
Expand Down
7 changes: 3 additions & 4 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import cudf
import cudf.core.column.column as column
from cudf import _lib as libcudf
from cudf.core._compat import PANDAS_GE_220
from cudf.core._internals import binaryop, unary
from cudf.core._internals.search import search_sorted
Expand Down Expand Up @@ -908,7 +907,7 @@ def _find_ambiguous_and_nonexistent(
ambiguous_end.to_pylibcudf(mode="read"),
plc.labeling.Inclusive.NO,
)
ambiguous = libcudf.column.Column.from_pylibcudf(plc_column)
ambiguous = ColumnBase.from_pylibcudf(plc_column)
ambiguous = ambiguous.notnull()

# At the start of a non-existent time period, Clock 2 reads less
Expand All @@ -927,10 +926,10 @@ def _find_ambiguous_and_nonexistent(
nonexistent_end.to_pylibcudf(mode="read"),
plc.labeling.Inclusive.NO,
)
nonexistent = libcudf.column.Column.from_pylibcudf(plc_column)
nonexistent = ColumnBase.from_pylibcudf(plc_column)
nonexistent = nonexistent.notnull()

return ambiguous, nonexistent
return ambiguous, nonexistent # type: ignore[return-value]

def tz_localize(
self,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4079,7 +4079,7 @@ def removesuffix(self, suffix: str) -> SeriesOrIndex:
ends_column = self.endswith(suffix)._column # type: ignore[union-attr]
removed_column = self.slice(0, -len(suffix), None)._column # type: ignore[union-attr]

result = removed_column.copy_if_else(self._column, ends_column)
result = removed_column.copy_if_else(self._column, ends_column) # type: ignore[arg-type]
return self._return_or_inplace(result)

def removeprefix(self, prefix: str) -> SeriesOrIndex:
Expand Down Expand Up @@ -4117,7 +4117,7 @@ def removeprefix(self, prefix: str) -> SeriesOrIndex:
return self._return_or_inplace(self._column)
starts_column = self.startswith(prefix)._column # type: ignore[union-attr]
removed_column = self.slice(len(prefix), None, None)._column # type: ignore[union-attr]
result = removed_column.copy_if_else(self._column, starts_column)
result = removed_column.copy_if_else(self._column, starts_column) # type: ignore[arg-type]
return self._return_or_inplace(result)

def _find(
Expand Down
18 changes: 7 additions & 11 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@

import cudf
import cudf.core.common
from cudf import _lib as libcudf
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_scalar_or_zero_d_array,
Expand Down Expand Up @@ -2502,8 +2501,7 @@ def scatter_by_map(
map_size,
)
partitioned_columns = [
libcudf.column.Column.from_pylibcudf(col)
for col in plc_table.columns()
ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
]

partitioned = self._from_columns_like_self(
Expand Down Expand Up @@ -4127,7 +4125,7 @@ def transpose(self):
)
)
result_columns = [
libcudf.column.Column.from_pylibcudf(col, data_ptr_exposed=True)
ColumnBase.from_pylibcudf(col, data_ptr_exposed=True)
for col in result_table.columns()
]

Expand Down Expand Up @@ -5035,8 +5033,7 @@ def partition_by_hash(
nparts,
)
output_columns = [
libcudf.column.Column.from_pylibcudf(col)
for col in plc_table.columns()
ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
]

outdf = self._from_columns_like_self(
Expand Down Expand Up @@ -7245,8 +7242,7 @@ def stack(
self.shape[0],
)
tiled_index = [
libcudf.column.Column.from_pylibcudf(plc)
for plc in plc_table.columns()
ColumnBase.from_pylibcudf(plc) for plc in plc_table.columns()
]

# Assemble the final index
Expand Down Expand Up @@ -7325,7 +7321,7 @@ def unnamed_group_generator():
)

with acquire_spill_lock():
interleaved_col = libcudf.column.Column.from_pylibcudf(
interleaved_col = ColumnBase.from_pylibcudf(
plc.reshape.interleave_columns(
plc.Table(
[
Expand Down Expand Up @@ -7839,7 +7835,7 @@ def interleave_columns(self):
"interleave_columns does not support 'category' dtype."
)
with acquire_spill_lock():
result_col = libcudf.column.Column.from_pylibcudf(
result_col = ColumnBase.from_pylibcudf(
plc.reshape.interleave_columns(
plc.Table(
[
Expand All @@ -7860,7 +7856,7 @@ def _compute_column(self, expr: str) -> ColumnBase:
),
plc.expressions.to_expression(expr, self._column_names),
)
return libcudf.column.Column.from_pylibcudf(plc_column)
return ColumnBase.from_pylibcudf(plc_column)

@_performance_tracking
def eval(self, expr: str, inplace: bool = False, **kwargs):
Expand Down
9 changes: 3 additions & 6 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import cudf
import cudf.core._internals
from cudf import _lib as libcudf
from cudf.api.extensions import no_default
from cudf.api.types import (
is_list_like,
Expand Down Expand Up @@ -1079,10 +1078,8 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
plc_tables[1],
plc.types.NullEquality.EQUAL,
)
left_order = libcudf.column.Column.from_pylibcudf(left_plc)
right_order = libcudf.column.Column.from_pylibcudf(
right_plc
)
left_order = ColumnBase.from_pylibcudf(left_plc)
right_order = ColumnBase.from_pylibcudf(right_plc)
# left order is some permutation of the ordering we
# want, and right order is a matching gather map for
# the result table. Get the correct order by sorting
Expand Down Expand Up @@ -2518,7 +2515,7 @@ def _cov_or_corr(self, func, method_name):

@acquire_spill_lock()
def interleave_columns(source_columns):
return libcudf.column.Column.from_pylibcudf(
return ColumnBase.from_pylibcudf(
plc.reshape.interleave_columns(
plc.Table(
[c.to_pylibcudf(mode="read") for c in source_columns]
Expand Down
7 changes: 3 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import pylibcudf as plc

import cudf
from cudf import _lib as libcudf
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
Expand Down Expand Up @@ -1364,8 +1363,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
plc.Table([rcol.to_pylibcudf(mode="read")]),
plc.types.NullEquality.EQUAL,
)
scatter_map = libcudf.column.Column.from_pylibcudf(left_plc)
indices = libcudf.column.Column.from_pylibcudf(right_plc)
scatter_map = ColumnBase.from_pylibcudf(left_plc)
indices = ColumnBase.from_pylibcudf(right_plc)
result = copying.scatter([indices], scatter_map, [result])[0]
result_series = cudf.Series._from_column(result)

Expand Down Expand Up @@ -3383,7 +3382,7 @@ def interval_range(
freq = freq.astype(common_dtype)

with acquire_spill_lock():
bin_edges = libcudf.column.Column.from_pylibcudf(
bin_edges = ColumnBase.from_pylibcudf(
plc.filling.sequence(
size=periods + 1,
init=start.device_value.c_value,
Expand Down
20 changes: 8 additions & 12 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import pylibcudf as plc

import cudf
import cudf._lib as libcudf
import cudf.core
import cudf.core._internals
import cudf.core.algorithms
Expand Down Expand Up @@ -2939,7 +2938,7 @@ def hash_values(
plc_column = plc.hashing.sha512(plc_table)
else:
raise ValueError(f"Unsupported hashing algorithm {method}.")
result = libcudf.column.Column.from_pylibcudf(plc_column)
result = ColumnBase.from_pylibcudf(plc_column)
return cudf.Series._from_column(
result,
index=self.index,
Expand Down Expand Up @@ -3057,7 +3056,7 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
[start, stop],
)
sliced = [
libcudf.column.Column.from_pylibcudf(col)
ColumnBase.from_pylibcudf(col)
for col in plc_tables[0].columns()
]
result = self._from_columns_like_self(
Expand Down Expand Up @@ -3257,10 +3256,10 @@ def duplicated(
plc.types.NullEquality.EQUAL,
plc.types.NanEquality.ALL_EQUAL,
)
distinct = libcudf.column.Column.from_pylibcudf(plc_column)
distinct = ColumnBase.from_pylibcudf(plc_column)
result = copying.scatter(
[cudf.Scalar(False)],
distinct,
distinct, # type: ignore[arg-type]
[as_column(True, length=len(self), dtype=bool)],
bounds_check=False,
)[0]
Expand All @@ -3282,8 +3281,7 @@ def _empty_like(self, keep_index: bool = True) -> Self:
)
)
columns = [
libcudf.column.Column.from_pylibcudf(col)
for col in plc_table.columns()
ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
]
result = self._from_columns_like_self(
columns,
Expand Down Expand Up @@ -5391,8 +5389,7 @@ def _explode(self, explode_column: Any, ignore_index: bool):
column_index + len(idx_cols),
)
exploded = [
libcudf.column.Column.from_pylibcudf(col)
for col in plc_table.columns()
ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
]
# We must copy inner datatype of the exploded list column to
# maintain struct dtype key names
Expand Down Expand Up @@ -5449,8 +5446,7 @@ def tile(self, count: int):
count,
)
tiled = [
libcudf.column.Column.from_pylibcudf(plc)
for plc in plc_table.columns()
ColumnBase.from_pylibcudf(plc) for plc in plc_table.columns()
]
return self._from_columns_like_self(
tiled,
Expand Down Expand Up @@ -6455,7 +6451,7 @@ def rank(

with acquire_spill_lock():
result_columns = [
libcudf.column.Column.from_pylibcudf(
ColumnBase.from_pylibcudf(
plc.sorting.rank(
col.to_pylibcudf(mode="read"),
method_enum,
Expand Down
24 changes: 12 additions & 12 deletions python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pylibcudf as plc

import cudf
from cudf import _lib as libcudf
from cudf.core._internals import sorting
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column import ColumnBase
Expand All @@ -25,10 +24,10 @@ class Merge:
@staticmethod
@acquire_spill_lock()
def _joiner(
lhs: list[libcudf.column.Column],
rhs: list[libcudf.column.Column],
lhs: list[ColumnBase],
rhs: list[ColumnBase],
how: str,
) -> tuple[libcudf.column.Column, libcudf.column.Column]:
) -> tuple[ColumnBase, ColumnBase]:
if how == "outer":
how = "full"
if (join_func := getattr(plc.join, f"{how}_join", None)) is None:
Expand All @@ -39,9 +38,10 @@ def _joiner(
plc.Table([col.to_pylibcudf(mode="read") for col in rhs]),
plc.types.NullEquality.EQUAL,
)
return libcudf.column.Column.from_pylibcudf(
left_rows
), libcudf.column.Column.from_pylibcudf(right_rows)
return (
ColumnBase.from_pylibcudf(left_rows),
ColumnBase.from_pylibcudf(right_rows),
)

def __init__(
self,
Expand Down Expand Up @@ -574,19 +574,19 @@ def _validate_merge_params(
class MergeSemi(Merge):
@staticmethod
@acquire_spill_lock()
def _joiner(
lhs: list[libcudf.column.Column],
rhs: list[libcudf.column.Column],
def _joiner( # type: ignore[override]
lhs: list[ColumnBase],
rhs: list[ColumnBase],
how: str,
) -> tuple[libcudf.column.Column, None]:
) -> tuple[ColumnBase, None]:
if (
join_func := getattr(
plc.join, f"{how.replace('left', 'left_')}_join", None
)
) is None:
raise ValueError(f"Invalid join type {how}")

return libcudf.column.Column.from_pylibcudf(
return ColumnBase.from_pylibcudf(
join_func(
plc.Table([col.to_pylibcudf(mode="read") for col in lhs]),
plc.Table([col.to_pylibcudf(mode="read") for col in rhs]),
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
import pylibcudf as plc

import cudf
import cudf._lib as libcudf
from cudf.api.extensions import no_default
from cudf.api.types import is_integer, is_list_like, is_object_dtype, is_scalar
from cudf.core import column
from cudf.core._base_index import _return_get_indexer_result
from cudf.core._internals import copying, sorting
from cudf.core.algorithms import factorize
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column.column import ColumnBase
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.frame import Frame
from cudf.core.index import (
Expand Down Expand Up @@ -1962,8 +1962,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
plc_tables[1],
plc.types.NullEquality.EQUAL,
)
scatter_map = libcudf.column.Column.from_pylibcudf(left_plc)
indices = libcudf.column.Column.from_pylibcudf(right_plc)
scatter_map = ColumnBase.from_pylibcudf(left_plc)
indices = ColumnBase.from_pylibcudf(right_plc)
result = copying.scatter([indices], scatter_map, [result])[0]
result_series = cudf.Series._from_column(result)

Expand Down
Loading