Skip to content

Commit 39009fc

Browse files
authored
Merge pull request #142 from stvoutsin/votable-benchmarks-bitarray
Add converters benchmark and add Bitarray column test for votable
2 parents c5142b8 + 3247d53 commit 39009fc

File tree

2 files changed

+95
-6
lines changed

2 files changed

+95
-6
lines changed

benchmarks/votable.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
"""Benchmarks for VOTable binary/binary2 parsing performance."""
22
import io
3-
import os
4-
import tempfile
5-
63
import numpy as np
74
from astropy.io.votable import parse, from_table
85
from astropy.table import Table
96

107
np.random.seed(42)
8+
rng = np.random.default_rng(42)
119

1210
SMALL_SIZE = 1000
1311
LARGE_SIZE = 200000
@@ -20,6 +18,7 @@
2018
id_data = np.arange(LARGE_SIZE, dtype=np.int64)
2119
flag_data = np.random.choice([True, False], LARGE_SIZE)
2220
quality_data = np.random.randint(0, 256, LARGE_SIZE, dtype=np.uint8)
21+
bool_data = rng.integers(0, 2, LARGE_SIZE, dtype=bool)
2322

2423
short_names = np.array([f"OBJ_{i:08d}" for i in range(LARGE_SIZE)])
2524
filter_names = np.random.choice(['u', 'g', 'r', 'i', 'z', 'Y'], LARGE_SIZE)
@@ -32,9 +31,18 @@
3231
])
3332

3433

35-
def create_votable_bytes(table_data, format_type='binary2'):
34+
def create_votable_bytes(
35+
table_data,
36+
format_type="binary2",
37+
bitarray_size=None):
3638
"""Helper to create VOTables with a specific serialization."""
3739
votable = from_table(table_data)
40+
41+
if bitarray_size is not None:
42+
for field in votable.get_first_table().fields:
43+
if field.datatype == "bit":
44+
field.arraysize = str(bitarray_size)
45+
3846
output = io.BytesIO()
3947
votable.to_xml(output, tabledata_format=format_type)
4048
return output.getvalue()
@@ -57,8 +65,10 @@ def setup(self):
5765
names=['ra', 'dec', 'mag', 'flux', 'counts', 'id', 'quality']
5866
)
5967

60-
self.binary_data = create_votable_bytes(table, 'binary')
61-
self.binary2_data = create_votable_bytes(table, 'binary2')
68+
self.binary_data = create_votable_bytes(
69+
table, "binary", bitarray_size=8)
70+
self.binary2_data = create_votable_bytes(
71+
table, "binary2", bitarray_size=8)
6272

6373
def time_numeric_binary(self):
6474
parse(io.BytesIO(self.binary_data))
@@ -177,6 +187,48 @@ def time_booleans_binary2(self):
177187
parse(io.BytesIO(self.binary2_data))
178188

179189

190+
class TimeVOTableBitArrayOptimization:
191+
"""Benchmark BitArray columns in Binary/Binary2 VOTables."""
192+
193+
def setup(self):
194+
table = Table(
195+
{
196+
"ra": ra_data[:LARGE_SIZE],
197+
"dec": dec_data[:LARGE_SIZE],
198+
"mag": mag_data[:LARGE_SIZE],
199+
"detected": rng.integers(0, 2, LARGE_SIZE).astype(bool),
200+
"saturated": rng.integers(0, 2, LARGE_SIZE).astype(bool),
201+
"edge_pixel": rng.integers(0, 2, LARGE_SIZE).astype(bool),
202+
"cosmic_ray": rng.integers(0, 2, LARGE_SIZE).astype(bool),
203+
}
204+
)
205+
206+
self.binary_bitarray_8_data = create_votable_bytes(
207+
table, "binary", "8")
208+
self.binary_bitarray_16_data = create_votable_bytes(
209+
table, "binary", "16")
210+
self.binary2_bitarray_8_data = create_votable_bytes(
211+
table, "binary2", "8")
212+
self.binary2_bitarray_16_data = create_votable_bytes(
213+
table, "binary2", "16")
214+
215+
def time_bitarray_8bit_binary(self):
216+
"""Parse BitArray with 8-bit arraysize."""
217+
parse(io.BytesIO(self.binary_bitarray_8_data))
218+
219+
def time_bitarray_16bit_binary(self):
220+
"""Parse BitArray with 16-bit arraysize."""
221+
parse(io.BytesIO(self.binary_bitarray_16_data))
222+
223+
def time_bitarray_8bit_binary2(self):
224+
"""Parse binary2 BitArray with 8-bit arraysize."""
225+
parse(io.BytesIO(self.binary2_bitarray_8_data))
226+
227+
def time_bitarray_16bit_binary2(self):
228+
"""Parse binary2 BitArray with 16-bit arraysize."""
229+
parse(io.BytesIO(self.binary2_bitarray_16_data))
230+
231+
180232
class TimeVOTableMixed:
181233
"""Benchmark for a table with mixed fields types."""
182234

benchmarks/votable_converters.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import numpy as np
2+
import numpy.ma as ma
3+
from astropy.io.votable.converters import bool_to_bitarray, bitarray_to_bool
4+
5+
SMALL_SIZE = 1000
6+
LARGE_SIZE = 100000
7+
8+
9+
class TimeBitArrayConverters:
10+
"""Direct converter function benchmarks."""
11+
12+
def setup(self):
13+
rng = np.random.default_rng(42)
14+
15+
self.small_bool = rng.integers(0, 2, SMALL_SIZE, dtype=bool)
16+
self.large_bool = rng.integers(0, 2, LARGE_SIZE, dtype=bool)
17+
18+
mask = rng.random(LARGE_SIZE) < 0.2
19+
self.masked_bool = ma.array(self.large_bool, mask=mask)
20+
21+
self.small_bits = bool_to_bitarray(self.small_bool)
22+
self.large_bits = bool_to_bitarray(self.large_bool)
23+
24+
def time_bool_to_bitarray_small(self):
25+
bool_to_bitarray(self.small_bool)
26+
27+
def time_bool_to_bitarray_large(self):
28+
bool_to_bitarray(self.large_bool)
29+
30+
def time_bool_to_bitarray_masked(self):
31+
bool_to_bitarray(self.masked_bool)
32+
33+
def time_bitarray_to_bool_small(self):
34+
bitarray_to_bool(self.small_bits, len(self.small_bool))
35+
36+
def time_bitarray_to_bool_large(self):
37+
bitarray_to_bool(self.large_bits, len(self.large_bool))

0 commit comments

Comments
 (0)