Skip to content

Backport PR #3643 on branch 1.11.x ((fix): sc.read with extension does not warn unnecessarily) #3661

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/3643.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed handling of `ext` argument in {func}`scanpy.read` {smaller}`I Gold`
51 changes: 35 additions & 16 deletions src/scanpy/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
from functools import partial
from pathlib import Path, PurePath
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, overload

import anndata.utils
import h5py
Expand Down Expand Up @@ -33,6 +33,7 @@
read_mtx,
read_text,
)

from anndata import AnnData
from matplotlib.image import imread

Expand Down Expand Up @@ -140,7 +141,7 @@ def read(

"""
filename = Path(filename) # allow passing strings
if is_valid_filename(filename):
if is_valid_filename(filename, ext=ext):
return _read(
filename,
backed=backed,
Expand Down Expand Up @@ -762,7 +763,7 @@ def _read( # noqa: PLR0912, PLR0915
msg = f"Please provide one of the available extensions.\n{avail_exts}"
raise ValueError(msg)
else:
ext = is_valid_filename(filename, return_ext=True)
ext = is_valid_filename(filename, return_ext=True, ext=ext)
is_present = _check_datafile_present_and_download(filename, backup_url=backup_url)
if not is_present:
logg.debug(f"... did not find original file {filename}")
Expand Down Expand Up @@ -1066,25 +1067,43 @@ def _check_datafile_present_and_download(path: Path, backup_url=None):
return True


def is_valid_filename(filename: Path, *, return_ext: bool = False):
@overload
def is_valid_filename(
filename: Path, *, return_ext: Literal[False] = False, ext: str | None = None
) -> bool: ...
@overload
def is_valid_filename(
filename: Path, *, return_ext: Literal[True], ext: str | None = None
) -> str: ...
def is_valid_filename(
filename: Path, *, return_ext: bool = False, ext: str | None = None
) -> str | bool:
"""Check whether the argument is a filename."""
ext = filename.suffixes

if len(ext) > 2:
ext_from_file = filename.suffixes
if ext is not None:
if not (joined_file_ext := ".".join(ext_from_file)).endswith(ext):
msg = f"{joined_file_ext} does not end in expected extension {ext}"
raise ValueError(msg)
return ext if return_ext else True
if len(ext_from_file) > 2:
logg.warning(
f"Your filename has more than two extensions: {ext}.\n"
f"Only considering the two last: {ext[-2:]}."
f"Your filename has more than two extensions: {ext_from_file}.\n"
f"Only considering the two last: {ext_from_file[-2:]}."
)
ext = ext[-2:]
ext_from_file = ext_from_file[-2:]

# cases for gzipped/bzipped text files
if len(ext) == 2 and ext[0][1:] in text_exts and ext[1][1:] in ("gz", "bz2"):
return ext[0][1:] if return_ext else True
elif ext and ext[-1][1:] in avail_exts:
return ext[-1][1:] if return_ext else True
elif "".join(ext) == ".soft.gz":
if (
len(ext_from_file) == 2
and ext_from_file[0][1:] in text_exts
and ext_from_file[1][1:] in ("gz", "bz2")
):
return ext_from_file[0][1:] if return_ext else True
elif ext_from_file and ext_from_file[-1][1:] in avail_exts:
return ext_from_file[-1][1:] if return_ext else True
elif "".join(ext_from_file) == ".soft.gz":
return "soft.gz" if return_ext else True
elif "".join(ext) == ".mtx.gz":
elif "".join(ext_from_file) == ".mtx.gz":
return "mtx.gz" if return_ext else True
elif not return_ext:
return False
Expand Down
12 changes: 12 additions & 0 deletions tests/test_readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

from pathlib import PurePosixPath, PureWindowsPath

import numpy as np
import pytest
from anndata import AnnData

import scanpy as sc
from scanpy.readwrite import _slugify


Expand All @@ -20,3 +23,12 @@
)
def test_slugify(path):
assert _slugify(path) == "C-foo-bar"


def test_read_ext_match(tmp_path):
adata_path = tmp_path / "foo.bar.anndata.h5ad"
AnnData(np.array([[1, 2], [3, 4]])).write_h5ad(adata_path)
with pytest.raises(ValueError, match="does not end in expected extension"):
sc.read(adata_path, ext="zarr")
# should not warn: https://github.com/scverse/scanpy/issues/2288
sc.read(adata_path, ext="h5ad")
Loading