Skip to content

Commit 5f43ecc

Browse files
tomwhitemergify[bot]
authored andcommitted
Factor out DEFAULT_COMPRESSOR. Gracefully handle Blosc import errors.
1 parent d99ea67 commit 5f43ecc

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

sgkit/io/vcf/vcf_reader.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import functools
22
import itertools
3+
import warnings
34
from contextlib import contextmanager
45
from dataclasses import dataclass
56
from pathlib import Path
@@ -20,7 +21,7 @@
2021
import numpy as np
2122
import xarray as xr
2223
from cyvcf2 import VCF, Variant
23-
from numcodecs import Blosc, PackBits
24+
from numcodecs import PackBits
2425

2526
from sgkit import variables
2627
from sgkit.io.utils import zarrs_to_dataset
@@ -40,6 +41,14 @@
4041
3 # equivalent to DEFAULT_ALT_NUMBER in vcf_read.py in scikit_allel
4142
)
4243

44+
try:
45+
from numcodecs import Blosc
46+
47+
DEFAULT_COMPRESSOR = Blosc(cname="zstd", clevel=7, shuffle=Blosc.AUTOSHUFFLE)
48+
except ImportError: # pragma: no cover
49+
warnings.warn("Cannot import Blosc, falling back to no compression", RuntimeWarning)
50+
DEFAULT_COMPRESSOR = None
51+
4352

4453
@contextmanager
4554
def open_vcf(path: PathType) -> Iterator[VCF]:
@@ -332,9 +341,7 @@ def vcf_to_zarr_sequential(
332341
region: Optional[str] = None,
333342
chunk_length: int = 10_000,
334343
chunk_width: int = 1_000,
335-
compressor: Optional[Any] = Blosc(
336-
cname="zstd", clevel=7, shuffle=Blosc.AUTOSHUFFLE
337-
),
344+
compressor: Optional[Any] = DEFAULT_COMPRESSOR,
338345
encoding: Optional[Any] = None,
339346
ploidy: int = 2,
340347
mixed_ploidy: bool = False,
@@ -478,7 +485,6 @@ def get_chunk_size(dim: Hashable, size: int) -> int:
478485
# values from function args (encoding) take precedence over default_encoding
479486
encoding = encoding or {}
480487
merged_encoding = {**default_encoding, **encoding}
481-
print(merged_encoding)
482488

483489
ds.to_zarr(output, mode="w", encoding=merged_encoding)
484490
first_variants_chunk = False
@@ -493,9 +499,7 @@ def vcf_to_zarr_parallel(
493499
regions: Union[None, Sequence[str], Sequence[Optional[Sequence[str]]]],
494500
chunk_length: int = 10_000,
495501
chunk_width: int = 1_000,
496-
compressor: Optional[Any] = Blosc(
497-
cname="zstd", clevel=7, shuffle=Blosc.AUTOSHUFFLE
498-
),
502+
compressor: Optional[Any] = DEFAULT_COMPRESSOR,
499503
encoding: Optional[Any] = None,
500504
temp_chunk_length: Optional[int] = None,
501505
tempdir: Optional[PathType] = None,
@@ -549,9 +553,7 @@ def vcf_to_zarrs(
549553
regions: Union[None, Sequence[str], Sequence[Optional[Sequence[str]]]],
550554
chunk_length: int = 10_000,
551555
chunk_width: int = 1_000,
552-
compressor: Optional[Any] = Blosc(
553-
cname="zstd", clevel=7, shuffle=Blosc.AUTOSHUFFLE
554-
),
556+
compressor: Optional[Any] = DEFAULT_COMPRESSOR,
555557
encoding: Optional[Any] = None,
556558
output_storage_options: Optional[Dict[str, str]] = None,
557559
ploidy: int = 2,
@@ -690,9 +692,7 @@ def vcf_to_zarr(
690692
regions: Union[None, Sequence[str], Sequence[Optional[Sequence[str]]]] = None,
691693
chunk_length: int = 10_000,
692694
chunk_width: int = 1_000,
693-
compressor: Optional[Any] = Blosc(
694-
cname="zstd", clevel=7, shuffle=Blosc.AUTOSHUFFLE
695-
),
695+
compressor: Optional[Any] = DEFAULT_COMPRESSOR,
696696
encoding: Optional[Any] = None,
697697
temp_chunk_length: Optional[int] = None,
698698
tempdir: Optional[PathType] = None,

0 commit comments

Comments
 (0)