Closed
Description
Hi. I've come across an issue where if a ROOT file has a colon in the name of it uproot3
can open the file but uproot4
fails. I can't give you the file, but I can show you a minimal failing example and then a reproducible example with public files.
Minimal Failing Example
$ tree .
.
├── data-tree
│ └── data16_13TeV:data16_13TeV.periodA.physics_Main.PhysCont.DAOD_JETM1.grp16_v01_p4061.root
├── issue.py
├── requirements.txt
1 directory, 3 files
$ cat requirements.txt
uproot
uproot4
$ docker run --rm -it -v $PWD:/data -w /data python:3.8 /bin/bash
root@510598a7f4e8:/data# python -m pip install --upgrade pip setuptools wheel
root@510598a7f4e8:/data# python -m pip install -r requirements.txt
root@510598a7f4e8:/data# python --version
Python 3.8.5
root@510598a7f4e8:/data# python -m pip list
Package Version
-------------- -------
awkward 0.13.0
cachetools 4.1.1
numpy 1.19.1
pip 20.2.2
setuptools 49.6.0
uproot 3.12.0
uproot-methods 0.7.4
uproot4 0.0.18
wheel 0.35.1
root@510598a7f4e8:/data# cp data-tree/data16_13TeV\:data16_13TeV.periodA.physics_Main.PhysCont.DAOD_JETM1.grp16_v01_p4061.root data-tree/renamed.root
root@510598a7f4e8:/data# python
Python 3.8.5 (default, Aug 5 2020, 08:22:02)
[GCC 8.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import uproot as uproot3
>>> import uproot4
>>> from pathlib import Path
>>>
>>> uproot3_file = uproot3.open(
... "data-tree/data16_13TeV:data16_13TeV.periodA.physics_Main.PhysCont.DAOD_JETM1.grp16_v01_p4061.root"
... )
>>> print(f"uproot3 opens file as {uproot3_file}")
uproot3 opens file as <ROOTDirectory b'/home/feickert/workarea/submitDir/data-tree//data16_13TeV:data16_13TeV.periodA.physics_Main.PhysCont.DAOD_JETM1.grp16_v01_p4061.root' at 0x7fce6da5b5e0>
>>>
>>> # uproot4 fails with the ':' in the filename
>>> uproot4.open(
... "data-tree/data16_13TeV:data16_13TeV.periodA.physics_Main.PhysCont.DAOD_JETM1.grp16_v01_p4061.root"
... )
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/uproot4/source/file.py", line 74, in __init__
self._file = numpy.memmap(self._file_path, dtype=self._dtype, mode="r")
File "/usr/local/lib/python3.8/site-packages/numpy/core/memmap.py", line 225, in __new__
f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
FileNotFoundError: [Errno 2] No such file or directory: 'data-tree/data16_13TeV'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.8/site-packages/uproot4/reading.py", line 78, in open
file = ReadOnlyFile(
File "/usr/local/lib/python3.8/site-packages/uproot4/reading.py", line 265, in __init__
self._source = Source(file_path, **self._options)
File "/usr/local/lib/python3.8/site-packages/uproot4/source/file.py", line 80, in __init__
self._fallback = uproot4.source.file.FileSource(file_path, opts)
AttributeError: module 'uproot4.source.file' has no attribute 'FileSource'
>>> # even if that is inside a pathlib object
>>> uproot4.open(
... Path(
... "data-tree/data16_13TeV:data16_13TeV.periodA.physics_Main.PhysCont.DAOD_JETM1.grp16_v01_p4061.root"
... )
... )
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/uproot4/source/file.py", line 74, in __init__
self._file = numpy.memmap(self._file_path, dtype=self._dtype, mode="r")
File "/usr/local/lib/python3.8/site-packages/numpy/core/memmap.py", line 225, in __new__
f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
FileNotFoundError: [Errno 2] No such file or directory: 'data-tree/data16_13TeV'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.8/site-packages/uproot4/reading.py", line 78, in open
file = ReadOnlyFile(
File "/usr/local/lib/python3.8/site-packages/uproot4/reading.py", line 265, in __init__
self._source = Source(file_path, **self._options)
File "/usr/local/lib/python3.8/site-packages/uproot4/source/file.py", line 80, in __init__
self._fallback = uproot4.source.file.FileSource(file_path, opts)
AttributeError: module 'uproot4.source.file' has no attribute 'FileSource'
>>> # but the file itself is fine
>>> uproot4.open("data-tree/renamed.root")
<ReadOnlyDirectory '/' at 0x7fce725e7670>
Failing Reproducible Example
# issue.py
import uproot as uproot3
import uproot4
from pathlib import Path
def main():
# curl -sL https://github.com/scikit-hep/scikit-hep-testdata/raw/master/src/skhep_testdata/data/uproot-HZZ-lz4.root -o uproot-HZZ-lz4.root
uproot3.open("uproot-HZZ-lz4.root")
uproot3.open("uproot:HZZ-lz4.root")
uproot3.open(Path("uproot:HZZ-lz4.root"))
uproot4.open("uproot-HZZ-lz4.root")
uproot4.open("uproot:HZZ-lz4.root")
if __name__ == "__main__":
main()
root@510598a7f4e8:/data# curl -sL https://github.com/scikit-hep/scikit-hep-testdata/raw/master/src/skhep_testdata/data/uproot-HZZ-lz4.root -o uproot-HZZ-lz4.root
root@510598a7f4e8:/data# cp uproot-HZZ-lz4.root uproot:HZZ-lz4.root
root@510598a7f4e8:/data# python issue.py
Traceback (most recent call last):
File "issue.py", line 17, in <module>
main()
File "issue.py", line 9, in main
uproot3.open("uproot:HZZ-lz4.root")
File "/usr/local/lib/python3.8/site-packages/uproot/rootio.py", line 63, in open
raise ValueError("URI scheme not recognized: {0}".format(path))
ValueError: URI scheme not recognized: uproot:HZZ-lz4.root
Comments
I realize that this is probably because uproot4
's open
's path
is
and it isn't a good idea to have a file with a colon in it in general. However, there are ATLAS files that do, and it would be nice if uproot3
behavior toward filenames could still be supported. If support for this is firmly out of scope it would be great if there could be some huge warning about this in the docs.