Skip to content

Commit 1bf2b00

Browse files
committed
Remove xarray dependency
1 parent d6e9b82 commit 1bf2b00

File tree

2 files changed

+29
-16
lines changed

2 files changed

+29
-16
lines changed

bio2zarr/bed2zarr.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
import numcodecs
88
import numpy as np
99
import pandas as pd
10-
import xarray as xr
10+
import zarr
1111

12-
from . import core
12+
from . import core, provenance
1313

1414
logger = logging.getLogger(__name__)
1515

1616
DEFAULT_ZARR_COMPRESSOR = numcodecs.Blosc(cname="zstd", clevel=7)
17+
BED_ZARR_VERSION = 0.1
1718

1819

1920
class BedType(Enum):
@@ -200,18 +201,29 @@ def bed2zarr(
200201
fields = update_field_bounds(data, bed_type)
201202
dtypes = {f.name: f.smallest_dtype() for f in fields}
202203
data.index.name = "records"
203-
ds = xr.Dataset.from_dataframe(data)
204-
for k, v in dtypes.items():
205-
ds[k] = ds[k].astype(v)
206-
if records_chunk_size is None:
207-
records_chunk_size = len(data)
208-
chunks = {
209-
"records": records_chunk_size,
210-
"contigs": len(contig_id),
211-
}
212-
ds["contig_id"] = xr.DataArray(contig_id, dims=["contigs"])
204+
data = data.astype(dtypes)
205+
store = zarr.DirectoryStore(zarr_path)
206+
root = zarr.group(store=store)
207+
root.attrs.update(
208+
{
209+
"bed_zarr_version": f"{BED_ZARR_VERSION}",
210+
"source": f"bio2zarr-{provenance.__version__}",
211+
}
212+
)
213+
for field in fields[0 : bed_type.value]:
214+
if field.name == "strand":
215+
root.array(
216+
field.name,
217+
data[field.name].values,
218+
chunks=(records_chunk_size,),
219+
dtype="<U1",
220+
)
221+
else:
222+
root.array(
223+
field.name,
224+
data[field.name].values,
225+
chunks=(records_chunk_size,),
226+
)
227+
root.array("contig_id", contig_id, chunks=(len(contig_id),))
213228
if bed_type.value >= BedType.BED4.value:
214-
ds["name_id"] = xr.DataArray(name_id, dims=["names"])
215-
chunks["names"] = len(name_id)
216-
ds = ds.chunk(chunks)
217-
ds.to_zarr(zarr_path, mode="w")
229+
root.array("name_id", name_id, chunks=(len(name_id),))

tests/test_bed.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def test_bed2zarr(self, bed_path, bed_df, tmp_path, request):
8585
np.testing.assert_array_equal(root["thickStart"][:], bed_df[6].values)
8686
if bed_type.value >= bed2zarr.BedType.BED8.value:
8787
np.testing.assert_array_equal(root["thickEnd"][:], bed_df[7].values)
88+
print(zarr_path)
8889

8990

9091
class TestBedData:

0 commit comments

Comments
 (0)