Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New array conversion methods #9236

Merged
merged 24 commits into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2922dd1
Add DataFrame.values_host and remove uses of DataFrame.as_matrix from…
vyasr Sep 10, 2021
853085e
Initial implementation of to_numpy and to_cupy.
vyasr Sep 10, 2021
baa873f
Add warning and fix comments.
vyasr Sep 14, 2021
05db299
Move all standard functions to frame.
vyasr Sep 14, 2021
59f944d
Add warning to Series.to_array.
vyasr Sep 14, 2021
f7054d7
Remove as_gpu_matrix from tests and fix some uncovered bugs.
vyasr Sep 14, 2021
77ae420
Centralize to_numpy and to_cupy logic.
vyasr Sep 14, 2021
da49be6
Delete docstrings for deprecated methods.
vyasr Sep 14, 2021
f70ab9e
Revert to calling the column methods directly for SingleColumnFrame.
vyasr Sep 14, 2021
3e6d6da
Replace to_array with to_numpy wherever easy in tests.
vyasr Sep 14, 2021
e1de7f9
Add support for null replacement and fix a few bugs.
vyasr Sep 15, 2021
4d1ac2a
Remove usage of default_na_value everywhere possible.
vyasr Sep 17, 2021
944704b
Rename default_na_value to _default_na_value.
vyasr Sep 17, 2021
047c2ca
Add comment for additional methods to be removed.
vyasr Sep 17, 2021
6fc2bbf
Add back accidentally remove default na and remove all possible refs …
vyasr Sep 17, 2021
b228483
Replace numerous uses of to_array with to_numpy.
vyasr Sep 17, 2021
46423f5
Address PR comments.
vyasr Sep 17, 2021
e808c90
Add more deprecation comments and fix issue in testing function.
vyasr Sep 21, 2021
603a8b6
Change exception thrown by StringColumn.values.
vyasr Sep 21, 2021
24724d9
Replace more instances of to_array in tests.
vyasr Sep 21, 2021
8ed6d00
Remove all remaining references to to_array in code.
vyasr Sep 21, 2021
bf898b2
Add proper implementation of find_common_type for categoricals.
vyasr Sep 21, 2021
22b961e
Convert columns to index before setting in _init_from_series_list.
vyasr Sep 22, 2021
8a62620
Address review comments.
vyasr Sep 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove as_gpu_matrix from tests and fix some uncovered bugs.
  • Loading branch information
vyasr committed Sep 21, 2021
commit f7054d7ad6f36a632a0d9ff7507cb8d30100f72b
8 changes: 7 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ def values_host(self) -> "np.ndarray":
"""
Return a numpy representation of the Column.
"""
if len(self) == 0:
return np.array([], dtype=self.dtype)

if self.has_nulls:
raise ValueError("Column must have no nulls.")

return self.data_array_view.copy_to_host()

@property
Expand All @@ -138,7 +144,7 @@ def values(self) -> "cupy.ndarray":
Return a CuPy representation of the Column.
"""
if len(self) == 0:
return cupy.asarray([], dtype=self.dtype)
return cupy.array([], dtype=self.dtype)

if self.has_nulls:
raise ValueError("Column must have no nulls.")
Expand Down
27 changes: 14 additions & 13 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,12 +941,13 @@ def test_dataframe_dir_and_getattr():
df.not_a_column


@pytest.mark.parametrize("order", ["C", "F"])
def test_empty_dataframe_as_gpu_matrix(order):
def test_empty_dataframe_to_array():
df = cudf.DataFrame()

# Check fully empty dataframe.
mat = df.as_gpu_matrix(order=order).copy_to_host()
mat = df.to_cupy()
assert mat.shape == (0, 0)
mat = df.to_numpy()
assert mat.shape == (0, 0)

df = cudf.DataFrame()
Expand All @@ -955,33 +956,32 @@ def test_empty_dataframe_as_gpu_matrix(order):
df[k] = np.random.random(nelem)

# Check all columns in empty dataframe.
mat = df.head(0).as_gpu_matrix(order=order).copy_to_host()
mat = df.head(0).to_cupy()
assert mat.shape == (0, 3)


@pytest.mark.parametrize("order", ["C", "F"])
def test_dataframe_as_gpu_matrix(order):
def test_dataframe_to_cupy():
df = cudf.DataFrame()

nelem = 123
for k in "abcd":
df[k] = np.random.random(nelem)

# Check all columns
mat = df.as_gpu_matrix(order=order).copy_to_host()
mat = df.to_numpy()
assert mat.shape == (nelem, 4)
for i, k in enumerate(df.columns):
np.testing.assert_array_equal(df[k].to_array(), mat[:, i])

# Check column subset
mat = df.as_gpu_matrix(order=order, columns=["a", "c"]).copy_to_host()
mat = df[["a", "c"]].to_cupy().get()
assert mat.shape == (nelem, 2)

for i, k in enumerate("ac"):
np.testing.assert_array_equal(df[k].to_array(), mat[:, i])


def test_dataframe_as_gpu_matrix_null_values():
def test_dataframe_to_cupy_null_values():
df = cudf.DataFrame()

nelem = 123
Expand All @@ -999,14 +999,15 @@ def test_dataframe_as_gpu_matrix_null_values():
refvalues[k] = data

# Check null value causes error
with pytest.raises(ValueError) as raises:
df.as_gpu_matrix()
raises.match("column 'a' has null values")
with pytest.raises(ValueError):
df.to_cupy()
with pytest.raises(ValueError):
df.to_numpy()

for k in df.columns:
df[k] = df[k].fillna(na)

mat = df.as_gpu_matrix().copy_to_host()
mat = df.to_numpy()
for i, k in enumerate(df.columns):
np.testing.assert_array_equal(refvalues[k], mat[:, i])

Expand Down