Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/coffea/nanoevents/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def from_root(
decompression_executor=None,
interpretation_executor=None,
delayed=uproot._util.unset,
preload=None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably move this further up the list. Maybe after entry start and entry stop? We want this to be visible no?

Copy link
Collaborator Author

@pfackeldey pfackeldey Aug 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't mean to mess with the order in case people pass positional args here - that would be a breaking change. Maybe coffea may want to force all of those options to be passed as kw-only at some point?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ohh yeah you're right about that. I guess I've never seen a person not pass kwargs that's why I said this. Indeed forcing kwargs would be best here from the coffea side.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lgray what do you think? Should we enforce kwargs in a separate PR? Should we just break the order here and not care? I've actually never seen anybody use positional args here because the first argument treepath is something that's never used because no one is actually passing in an uproot.reading.ReadOnlyDirectory to from_root.
`

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah - sorry for missing this. I would take it to a separate PR. I hope no one is actually using those and positional arguments though.

):
"""Quickly build NanoEvents from a root file

Expand Down Expand Up @@ -310,6 +311,9 @@ def from_root(
see: https://github.com/scikit-hep/uproot5/blob/main/src/uproot/_dask.py#L109
interpretation_executor (None or Executor with a ``submit`` method):
see: https://github.com/scikit-hep/uproot5/blob/main/src/uproot/_dask.py#L113
preload (None or Callable):
A function to call to preload specific branches/columns in bulk. Only works in eager and virtual mode.
Passed to ``tree.arrays`` as the ``filter_branch`` argument to filter branches to be preloaded.
Comment on lines +314 to +316
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above, I'd not put this in the end.


Returns
-------
Expand Down Expand Up @@ -415,6 +419,24 @@ def from_root(
f"{entry_start}-{entry_stop}",
)
uuidpfn = {partition_key[0]: tree.file.file_path}

preloaded_arrays = None
if preload is not None:
preloaded_arrays = tree.arrays(
filter_branch=preload,
entry_start=entry_start,
entry_stop=entry_stop,
ak_add_doc=True,
decompression_executor=decompression_executor,
interpretation_executor=interpretation_executor,
how=dict,
)
# this ensures that the preloaded arrays are only sliced as they are supposed to be
preloaded_arrays = {
k: _OnlySliceableAs(v, slice(entry_start, entry_stop))
for k, v in preloaded_arrays.items()
}

mapping = UprootSourceMapping(
TrivialUprootOpener(uuidpfn, uproot_options),
entry_start,
Expand All @@ -423,6 +445,7 @@ def from_root(
access_log=access_log,
use_ak_forth=use_ak_forth,
virtual=mode == "virtual",
preloaded_arrays=preloaded_arrays,
)
mapping.preload_column_source(partition_key[0], partition_key[1], tree)

Expand Down
28 changes: 18 additions & 10 deletions src/coffea/nanoevents/mapping/uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def __init__(
virtual=False,
decompression_executor=None,
interpretation_executor=None,
preloaded_arrays=None,
):
super().__init__(
fileopener=fileopener,
Expand All @@ -138,6 +139,7 @@ def __init__(
self.interpretation_executor = (
interpretation_executor or uproot.source.futures.TrivialExecutor()
)
self.preloaded_arrays = preloaded_arrays

@classmethod
def _extract_base_form(cls, tree, iteritems_options={}):
Expand Down Expand Up @@ -211,16 +213,22 @@ def extract_column(
"Received columnhandle of None when missing column in file is not allowed!"
)

interp = columnhandle.interpretation
interp._forth = use_ak_forth

the_array = columnhandle.array(
interp,
entry_start=start,
entry_stop=stop,
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
if (
self.preloaded_arrays is not None
and columnhandle.name in self.preloaded_arrays
):
the_array = self.preloaded_arrays[columnhandle.name][start:stop]
else:
interp = columnhandle.interpretation
interp._forth = use_ak_forth

the_array = columnhandle.array(
interp,
entry_start=start,
entry_stop=stop,
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
if isinstance(the_array.layout, awkward.contents.ListOffsetArray):
the_array = awkward.Array(the_array.layout.to_ListOffsetArray64(True))

Expand Down
Loading