-
Notifications
You must be signed in to change notification settings - Fork 908
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Migrate ORC Writer to pylibcudf #17310
base: branch-24.12
Are you sure you want to change the base?
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small suggestions for cleanup, looks good to me.
str.encode( | ||
_index_level_name(idx_name, level, table._column_names) | ||
) | ||
tbl_meta.c_obj.column_metadata[level].set_name( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the reviewer, c_obj
is not usually accessed in the cudf._lib
modules. I did it here because I didn't want to have to maintain a column_data
attribute in TableInputMetadata
. You'd have to "maintain" it because self.c_obj.column_metadata
and self.column_metadata
are different. Another approach is to have ColumnInMetadata
's c_obj
be a pointer. WDYT?
Here's what it looks like if ColumnInMetadata's
c_obj` is a pointer.
diff --git a/python/pylibcudf/pylibcudf/io/types.pxd b/python/pylibcudf/pylibcudf/io/types.pxd
index 6e42c9c309..f8d1c515db 100644
--- a/python/pylibcudf/pylibcudf/io/types.pxd
+++ b/python/pylibcudf/pylibcudf/io/types.pxd
@@ -48,7 +48,7 @@ cdef class SinkInfo:
cdef sink_info c_obj
cdef class ColumnInMetadata:
- cdef column_in_metadata c_obj
+ cdef column_in_metadata* c_obj
cpdef ColumnInMetadata set_name(self, str name)
@@ -75,7 +75,7 @@ cdef class ColumnInMetadata:
cpdef str get_name(self)
@staticmethod
- cdef ColumnInMetadata from_libcudf(column_in_metadata data)
+ cdef ColumnInMetadata from_libcudf(column_in_metadata* data)
cdef class TableInputMetadata:
cdef public Table table
and
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index c72fe3d4aa..f31f5c4d30 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -33,6 +33,7 @@ from pylibcudf.libcudf.io.types import (
quote_style as QuoteStyle, # no-cython-lint
statistics_freq as StatisticsFreq, # no-cython-lint
)
+from cython.operator cimport dereference
__all__ = [
"ColumnEncoding",
@@ -345,10 +346,10 @@ cdef class ColumnInMetadata:
)
@staticmethod
- cdef ColumnInMetadata from_libcudf(column_in_metadata data):
+ cdef ColumnInMetadata from_libcudf(column_in_metadata* data):
"""Create a Python ColumnInMetadata from a libcudf column_in_metadata."""
cdef ColumnInMetadata out = ColumnInMetadata.__new__(ColumnInMetadata)
- out.c_obj = move(data)
+ out.c_obj = data
return out
cpdef ColumnInMetadata set_name(self, str name):
@@ -362,7 +363,7 @@ cdef class ColumnInMetadata:
-------
Self
"""
- self.c_obj.set_name(name.encode())
+ dereference(self.c_obj).set_name(name.encode())
return self
...
cpdef ColumnInMetadata set_decimal_precision(self, int precision):
@@ -417,7 +418,7 @@ cdef class ColumnInMetadata:
-------
Self
"""
- self.c_obj.set_decimal_precision(precision)
+ dereference(self.c_obj).set_decimal_precision(precision)
return self
cpdef ColumnInMetadata child(self, int i):
@@ -431,9 +432,8 @@ cdef class ColumnInMetadata:
-------
ColumnInMetadata
"""
- return ColumnInMetadata.from_libcudf(
- move(self.c_obj.child(i))
- )
+ cdef column_in_metadata* child_c_obj = &dereference(self.c_obj).child(i)
+ return ColumnInMetadata.from_libcudf(child_c_obj)
...
cdef class TableInputMetadata:
@@ -514,3 +514,8 @@ cdef class TableInputMetadata:
"""
def __init__(self, Table table):
self.c_obj = table_input_metadata(table.view())
+ self.column_metadata = []
+
+ cdef int num_columns = self.c_obj.column_metadata.size()
+ for i in range(num_columns):
+ self.column_metadata.append(ColumnInMetadata.from_libcudf(&self.c_obj.column_metadata[i]))
Description
Apart of #15162.
Checklist