Skip to content

refactor: RNTuple writing improvements #1431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions src/uproot/behaviors/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def iterate(
step_size="100 MB",
decompression_executor=None, # TODO: Not implemented yet
library="ak", # TODO: Not implemented yet
ak_add_doc=False, # TODO: Not implemented yet
ak_add_doc=False,
how=None,
report=False, # TODO: Not implemented yet
allow_missing=False, # TODO: Not implemented yet
Expand Down Expand Up @@ -207,7 +207,7 @@ def concatenate(
entry_stop=None,
decompression_executor=None, # TODO: Not implemented yet
library="ak", # TODO: Not implemented yet
ak_add_doc=False, # TODO: Not implemented yet
ak_add_doc=False,
how=None,
allow_missing=False,
# For compatibility reasons we also accepts kwargs meant for TTrees
Expand Down Expand Up @@ -488,6 +488,7 @@ def to_akform(
filter_name=no_filter,
filter_typename=no_filter,
filter_field=no_filter,
ak_add_doc=False,
# For compatibility reasons we also accepts kwargs meant for TTrees
filter_branch=unset,
):
Expand All @@ -501,6 +502,10 @@ def to_akform(
filter to select ``RFields`` using the full
:doc:`uproot.models.RNTuple.RField` object. The ``RField`` is
included if the function returns True, excluded if it returns False.
ak_add_doc (bool | dict ): If True and ``library="ak"``, add the RField ``name``
to the Awkward ``__doc__`` parameter of the array.
if dict = {key:value} and ``library="ak"``, add the RField ``value`` to the
Awkward ``key`` parameter of the array.
filter_branch (None or function of :doc:`uproot.models.RNTuple.RField` \u2192 bool): An alias for ``filter_field`` included
for compatibility with software that was used for :doc:`uproot.behaviors.TBranch.TBranch`. This argument should not be used
and will be removed in a future version.
Expand All @@ -524,17 +529,31 @@ def to_akform(
# the field needs to be in the keys or be a parent of a field in the keys
if any(key.startswith(field.name) for key in keys):
top_names.append(field.name)
record_list.append(rntuple.field_form(field.field_id, keys))
record_list.append(
rntuple.field_form(field.field_id, keys, ak_add_doc=ak_add_doc)
)
else:
# Always use the full path for keys
# Also include the field itself
keys = [self.path] + [f"{self.path}.{k}" for k in keys]
# The field needs to be in the keys or be a parent of a field in the keys
if any(key.startswith(self.path) for key in keys):
top_names.append(self.name)
record_list.append(rntuple.field_form(self.field_id, keys))
record_list.append(
rntuple.field_form(self.field_id, keys, ak_add_doc=ak_add_doc)
)

form = ak.forms.RecordForm(record_list, top_names, form_key="toplevel")
parameters = None
if isinstance(ak_add_doc, bool) and ak_add_doc and self.description != "":
parameters = {"__doc__": self.description}
elif isinstance(ak_add_doc, dict) and self is not rntuple:
parameters = {
key: self.__getattribute__(value) for key, value in ak_add_doc.items()
}

form = ak.forms.RecordForm(
record_list, top_names, form_key="toplevel", parameters=parameters
)
return form

def arrays(
Expand Down Expand Up @@ -658,6 +677,7 @@ def arrays(
filter_typename=filter_typename,
filter_field=filter_field,
filter_branch=filter_branch,
ak_add_doc=ak_add_doc,
)

# only read columns mentioned in the awkward form
Expand Down Expand Up @@ -750,7 +770,7 @@ def iterate(
step_size="100 MB",
decompression_executor=None, # TODO: Not implemented yet
library="ak", # TODO: Not implemented yet
ak_add_doc=False, # TODO: Not implemented yet
ak_add_doc=False,
how=None,
report=False, # TODO: Not implemented yet
# For compatibility reasons we also accepts kwargs meant for TTrees
Expand Down Expand Up @@ -852,6 +872,7 @@ def iterate(
filter_typename=filter_typename,
filter_field=filter_field,
filter_branch=filter_branch,
ak_add_doc=ak_add_doc,
)

step_size = _regularize_step_size(
Expand Down Expand Up @@ -1567,6 +1588,13 @@ def name(self):
"""
return self.header.ntuple_name

@property
def description(self):
"""
Description of the ``RNTuple``.
"""
return self.header.ntuple_description

@property
def object_path(self):
"""
Expand Down
23 changes: 13 additions & 10 deletions src/uproot/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
from __future__ import annotations

from enum import IntEnum
from enum import IntFlag

import numpy

Expand Down Expand Up @@ -184,8 +184,8 @@
0x19: 64,
0x1A: 32,
0x1B: 64,
0x1C: 31, # variable from 10 to 31
0x1D: 32, # variable from 1 to 32
0x1C: 32, # from 10 to 31 in storage, but 32 in memory
0x1D: 32, # from 1 to 32 in storage, but 32 in memory
}
rntuple_col_type_to_num_dict = {
"bit": 0x00,
Expand Down Expand Up @@ -253,40 +253,43 @@
)


class RNTupleLocatorType(IntEnum):
class RNTupleLocatorType(IntFlag):
STANDARD = 0x00
LARGE = 0x01


class RNTupleEnvelopeType(IntEnum):
class RNTupleEnvelopeType(IntFlag):
RESERVED = 0x00
HEADER = 0x01
FOOTER = 0x02
PAGELIST = 0x03


class RNTupleFieldRole(IntEnum):
class RNTupleFieldRole(IntFlag):
LEAF = 0x00
COLLECTION = 0x01
RECORD = 0x02
VARIANT = 0x03
STREAMER = 0x04


class RNTupleFieldFlag(IntEnum):
class RNTupleFieldFlags(IntFlag):
NOFLAG = 0x00
REPETITIVE = 0x01
PROJECTED = 0x02
CHECKSUM = 0x04


class RNTupleColumnFlag(IntEnum):
class RNTupleColumnFlags(IntFlag):
NOFLAG = 0x00
DEFERRED = 0x01
RANGE = 0x02


class RNTupleExtraTypeIdentifier(IntEnum):
class RNTupleExtraTypeIdentifier(IntFlag):
ROOT = 0x00


class RNTupleClusterFlag(IntEnum):
class RNTupleClusterFlags(IntFlag):
NOFLAG = 0x00
SHARDED = 0x01
Loading