Skip to content

Commit f6d0f48

Browse files
ariostaspfackeldey
authored andcommitted
chore: update reading support for RNTuple v1 (scikit-hep#1338)
* Started updating to RNTuple v1 * Don't use hardcoded sizes * Some more updates and fixes * Re-enabled most tests * Renamed test for consistency * Read extra info from page locations * Fixed bug for clusters without deferred columns * Enable remaining RNTuple tests * Fix test file names
1 parent ff5e348 commit f6d0f48

12 files changed

+335
-359
lines changed

src/uproot/const.py

Lines changed: 125 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -117,108 +117,136 @@
117117

118118
kStreamedMemberWise = numpy.uint16(1 << 14)
119119

120-
############ RNTuple https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md
120+
############ RNTuple https://github.com/root-project/root/blob/0b9cdbcfd326ba50ee6c2f202675656129eafbe7/tree/ntuple/v7/doc/BinaryFormatSpecification.md
121121
rntuple_col_num_to_dtype_dict = {
122-
1: "uint64",
123-
2: "uint32",
124-
3: "switch", # Switch
125-
4: "uint8",
126-
5: "uint8", # char
127-
6: "bit",
128-
7: "float64",
129-
8: "float32",
130-
9: "float16",
131-
10: "uint64",
132-
11: "uint32",
133-
12: "uint16",
134-
13: "uint8",
135-
14: "uint64", # SplitIndex64 delta encoding
136-
15: "uint32", # SplitIndex32 delta encoding
137-
16: "float64", # split
138-
17: "float32", # split
139-
18: "float16", # split
140-
19: "uint64", # split
141-
20: "uint32", # split
142-
21: "uint16", # split
143-
22: "int64",
144-
23: "int32",
145-
24: "int16",
146-
25: "int8",
147-
26: "int64", # split + zigzag encoding
148-
27: "int32", # split + zigzag encoding
149-
28: "int16", # split + zigzag encoding
150-
29: "float32trunc",
151-
30: "float32quant",
122+
0x00: "bit",
123+
0x01: "uint8", # uninterpreted byte
124+
0x02: "uint8", # char
125+
0x03: "int8",
126+
0x04: "uint8",
127+
0x05: "int16",
128+
0x06: "uint16",
129+
0x07: "int32",
130+
0x08: "uint32",
131+
0x09: "int64",
132+
0x0A: "uint64",
133+
0x0B: "float16",
134+
0x0C: "float32",
135+
0x0D: "float64",
136+
0x0E: "uint32", # Index32
137+
0x0F: "uint64", # Index64
138+
0x10: "switch", # Switch: (uint64, uint32)
139+
0x11: "int16", # SplitInt16: split + zigzag encoding
140+
0x12: "uint16", # SplitUInt16: split encoding
141+
0x13: "int32", # SplitInt32: split + zigzag encoding
142+
0x14: "uint32", # SplitUInt32: split encoding
143+
0x15: "int64", # SplitInt64: split + zigzag encoding
144+
0x16: "uint64", # SplitUInt64: split encoding
145+
0x17: "float16", # SplitReal16: split encoding
146+
0x18: "float32", # SplitReal32: split encoding
147+
0x19: "float64", # SplitReal64: split encoding
148+
0x1A: "uint32", # SplitIndex32: split + delta encoding
149+
0x1B: "uint64", # SplitIndex64: split + delta encoding
150+
0x1C: "real32trunc", # Real32Trunc: float32 with truncated mantissa
151+
0x1D: "real32quant", # Real32Quant: float32 with quantized integer representation
152152
}
153153
rntuple_col_num_to_size_dict = {
154-
1: 64,
155-
2: 32,
156-
3: 96, # Switch
157-
4: 8,
158-
5: 8, # char
159-
6: 1,
160-
7: 64,
161-
8: 32,
162-
9: 16,
163-
10: 64,
164-
11: 32,
165-
12: 16,
166-
13: 8,
167-
14: 64, # SplitIndex64 delta encoding
168-
15: 32, # SplitIndex32 delta encoding
169-
16: 64, # split
170-
17: 32, # split
171-
18: 16, # split
172-
19: 64, # split
173-
20: 32, # split
174-
21: 16, # split
175-
22: 64,
176-
23: 32,
177-
24: 16,
178-
25: 8,
179-
26: 64, # split + zigzag encoding
180-
27: 32, # split + zigzag encoding
181-
28: 16, # split + zigzag encoding
182-
29: 32, # TODO: variable size
183-
30: 32, # TODO: variable size
154+
0x00: 1,
155+
0x01: 8,
156+
0x02: 8,
157+
0x03: 8,
158+
0x04: 8,
159+
0x05: 16,
160+
0x06: 16,
161+
0x07: 32,
162+
0x08: 32,
163+
0x09: 64,
164+
0x0A: 64,
165+
0x0B: 16,
166+
0x0C: 32,
167+
0x0D: 64,
168+
0x0E: 32,
169+
0x0F: 64,
170+
0x10: 96,
171+
0x11: 16,
172+
0x12: 16,
173+
0x13: 32,
174+
0x14: 32,
175+
0x15: 64,
176+
0x16: 64,
177+
0x17: 16,
178+
0x18: 32,
179+
0x19: 64,
180+
0x1A: 32,
181+
0x1B: 64,
182+
0x1C: 31, # variable from 10 to 31
183+
0x1D: 32, # variable from 1 to 32
184184
}
185-
186185
rntuple_col_type_to_num_dict = {
187-
"index64": 1,
188-
"index32": 2,
189-
"switch": 3,
190-
"byte": 4,
191-
"char": 5,
192-
"bit": 6,
193-
"real64": 7,
194-
"real32": 8,
195-
"real16": 9,
196-
"uint64": 10,
197-
"uint32": 11,
198-
"uint16": 12,
199-
"uint8": 13,
200-
"splitindex64": 14,
201-
"splitindex32": 15,
202-
"splitreal64": 16,
203-
"splitreal32": 17,
204-
"splitreal16": 18,
205-
"splitin64": 19,
206-
"splitint32": 20,
207-
"splitint16": 21,
208-
"int64": 22,
209-
"int32": 23,
210-
"int16": 24,
211-
"int8": 25,
212-
"splitzigzagint64": 26,
213-
"splitzigzagint32": 27,
214-
"splitzigzagint16": 28,
186+
"bit": 0x00,
187+
"byte": 0x01,
188+
"char": 0x02,
189+
"int8": 0x03,
190+
"uint8": 0x04,
191+
"int16": 0x05,
192+
"uint16": 0x06,
193+
"int32": 0x07,
194+
"uint32": 0x08,
195+
"int64": 0x09,
196+
"uint64": 0x0A,
197+
"real16": 0x0B,
198+
"real32": 0x0C,
199+
"real64": 0x0D,
200+
"index32": 0x0E,
201+
"index64": 0x0F,
202+
"switch": 0x10,
203+
"splitint16": 0x11,
204+
"splituint16": 0x12,
205+
"splitint32": 0x13,
206+
"splituint32": 0x14,
207+
"splitint64": 0x15,
208+
"splituint64": 0x16,
209+
"splitreal16": 0x17,
210+
"splitreal32": 0x18,
211+
"splitreal64": 0x19,
212+
"splitindex32": 0x1A,
213+
"splitindex64": 0x1B,
214+
"real32trunc": 0x1C,
215+
"real32quant": 0x1D,
215216
}
217+
rntuple_index_types = (
218+
rntuple_col_type_to_num_dict["index32"],
219+
rntuple_col_type_to_num_dict["index64"],
220+
rntuple_col_type_to_num_dict["splitindex32"],
221+
rntuple_col_type_to_num_dict["splitindex64"],
222+
)
223+
rntuple_split_types = (
224+
rntuple_col_type_to_num_dict["splitint16"],
225+
rntuple_col_type_to_num_dict["splituint16"],
226+
rntuple_col_type_to_num_dict["splitint32"],
227+
rntuple_col_type_to_num_dict["splituint32"],
228+
rntuple_col_type_to_num_dict["splitint64"],
229+
rntuple_col_type_to_num_dict["splituint64"],
230+
rntuple_col_type_to_num_dict["splitreal16"],
231+
rntuple_col_type_to_num_dict["splitreal32"],
232+
rntuple_col_type_to_num_dict["splitreal64"],
233+
rntuple_col_type_to_num_dict["splitindex32"],
234+
rntuple_col_type_to_num_dict["splitindex64"],
235+
)
236+
rntuple_zigzag_types = (
237+
rntuple_col_type_to_num_dict["splitint16"],
238+
rntuple_col_type_to_num_dict["splitint32"],
239+
rntuple_col_type_to_num_dict["splitint64"],
240+
)
241+
rntuple_delta_types = (
242+
rntuple_col_type_to_num_dict["splitindex32"],
243+
rntuple_col_type_to_num_dict["splitindex64"],
244+
)
216245

217246

218247
class RNTupleLocatorType(IntEnum):
219248
STANDARD = 0x00
220249
LARGE = 0x01
221-
DAOS = 0x02
222250

223251

224252
class RNTupleEnvelopeType(IntEnum):
@@ -230,10 +258,10 @@ class RNTupleEnvelopeType(IntEnum):
230258

231259
class RNTupleFieldRole(IntEnum):
232260
LEAF = 0x00
233-
VECTOR = 0x01
234-
STRUCT = 0x02
235-
UNION = 0x03
236-
UNSPLIT = 0x04
261+
COLLECTION = 0x01
262+
RECORD = 0x02
263+
VARIANT = 0x03
264+
STREAMER = 0x04
237265

238266

239267
class RNTupleFieldFlag(IntEnum):
@@ -243,20 +271,13 @@ class RNTupleFieldFlag(IntEnum):
243271

244272

245273
class RNTupleColumnFlag(IntEnum):
246-
DEFERRED = 0x08
247-
RANGE = 0x10
274+
DEFERRED = 0x01
275+
RANGE = 0x02
248276

249277

250278
class RNTupleExtraTypeIdentifier(IntEnum):
251279
ROOT = 0x00
252280

253281

254-
class RNTupleUserMetadataType(IntEnum):
255-
INT = 0x01
256-
BOOL = 0x02
257-
DOUBLE = 0x03
258-
STRING = 0x04
259-
260-
261282
class RNTupleClusterFlag(IntEnum):
262283
SHARDED = 0x01

0 commit comments

Comments
 (0)