Skip to content

Possible race condition in _concat_zarrs_optimized #388

Open
@tomwhite

Description

@tomwhite
================================== FAILURES ===================================
_________________ test_vcfzarr_to_zarr[None-True-True-False] __________________

shared_datadir = WindowsPath('C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/data')
tmpdir = local('C:\\Users\\runneradmin\\AppData\\Local\\Temp\\pytest-of-runneradmin\\pytest-0\\test_vcfzarr_to_zarr_None_True1')
grouped_by_contig = True, consolidated = True, has_variant_id = False
concat_algorithm = None

    @pytest.mark.parametrize(
        "grouped_by_contig, consolidated, has_variant_id",
        [
            (False, False, False),
            (False, False, True),
            (True, False, True),
            (True, True, False),
        ],
    )
    @pytest.mark.parametrize(
        "concat_algorithm",
        [None, "xarray_internal"],
    )
    def test_vcfzarr_to_zarr(
        shared_datadir,
        tmpdir,
        grouped_by_contig,
        consolidated,
        has_variant_id,
        concat_algorithm,
    ):
        if has_variant_id:
            fields = None
        else:
            fields = [
                "variants/CHROM",
                "variants/POS",
                "variants/REF",
                "variants/ALT",
                "calldata/GT",
                "samples",
            ]
    
        vcfzarr_path = create_vcfzarr(  # type: ignore[no-untyped-call]
            shared_datadir,
            tmpdir,
            fields=fields,
            grouped_by_contig=grouped_by_contig,
            consolidated=consolidated,
        )
    
        output = str(tmpdir / "vcf.zarr")
>       vcfzarr_to_zarr(
            vcfzarr_path,
            output,
            grouped_by_contig=grouped_by_contig,
            concat_algorithm=concat_algorithm,
            consolidated=consolidated,
        )

sgkit\tests\test_vcfzarr_reader.py:135: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sgkit\io\vcfzarr_reader.py:134: in vcfzarr_to_zarr
    _concat_zarrs_optimized(
sgkit\io\vcfzarr_reader.py:259: in _concat_zarrs_optimized
    output_zarr[var].attrs.update(attrs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <zarr.hierarchy.Group '/'>, item = 'variant_allele'

    def __getitem__(self, item):
        """Obtain a group member.
    
        Parameters
        ----------
        item : string
            Member name or path.
    
        Examples
        --------
        >>> import zarr
        >>> g1 = zarr.group()
        >>> d1 = g1.create_dataset('foo/bar/baz', shape=100, chunks=10)
        >>> g1['foo']
        <zarr.hierarchy.Group '/foo'>
        >>> g1['foo/bar']
        <zarr.hierarchy.Group '/foo/bar'>
        >>> g1['foo/bar/baz']
        <zarr.core.Array '/foo/bar/baz' (100,) float64>
    
        """
        path = self._item_path(item)
        if contains_array(self._store, path):
            return Array(self._store, read_only=self._read_only, path=path,
                         chunk_store=self._chunk_store,
                         synchronizer=self._synchronizer, cache_attrs=self.attrs.cache)
        elif contains_group(self._store, path):
            return Group(self._store, read_only=self._read_only, path=path,
                         chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
                         synchronizer=self._synchronizer)
        else:
>           raise KeyError(item)
E           KeyError: 'variant_allele'

c:\miniconda3\envs\test\lib\site-packages\zarr\hierarchy.py:349: KeyError
---------------------------- Captured stderr call -----------------------------
Exception ignored in: <_io.FileIO name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/call_genotype_mask/.zarray' mode='rb' closefd=True>
Traceback (most recent call last):
  File "c:\miniconda3\envs\test\lib\site-packages\fsspec\spec.py", line 626, in cat_file
    return self.open(path, "rb").read()
ResourceWarning: unclosed file <_io.BufferedReader name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/call_genotype_mask/.zarray'>
Exception ignored in: <_io.FileIO name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/variant_position/.zarray' mode='rb' closefd=True>
Traceback (most recent call last):
  File "c:\miniconda3\envs\test\lib\site-packages\fsspec\spec.py", line 626, in cat_file
    return self.open(path, "rb").read()
ResourceWarning: unclosed file <_io.BufferedReader name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/variant_position/.zarray'>
Exception ignored in: <_io.FileIO name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/call_genotype/.zarray' mode='rb' closefd=True>
Traceback (most recent call last):
  File "c:\miniconda3\envs\test\lib\site-packages\fsspec\spec.py", line 626, in cat_file
    return self.open(path, "rb").read()
ResourceWarning: unclosed file <_io.BufferedReader name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/call_genotype/.zarray'>
Exception ignored in: <_io.FileIO name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/variant_contig/.zarray' mode='rb' closefd=True>
Traceback (most recent call last):
  File "c:\miniconda3\envs\test\lib\site-packages\fsspec\spec.py", line 626, in cat_file
    return self.open(path, "rb").read()
ResourceWarning: unclosed file <_io.BufferedReader name='C:/Users/runneradmin/AppData/Local/Temp/pytest-of-runneradmin/pytest-0/test_vcfzarr_to_zarr_None_True1/vcf.zarr/variant_contig/.zarray'>
=========================== short test summary info ===========================
FAILED sgkit/tests/test_vcfzarr_reader.py::test_vcfzarr_to_zarr[None-True-True-False]
====== 1 failed, 480 passed, 13 skipped, 2 xfailed in 198.28s (0:03:18) =======
Error: Process completed with exit code 1.

Metadata

Metadata

Assignees

No one assigned

    Labels

    IOIssues related to reading and writing common third-party file formats

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions