Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: cleaning up reader #141

Merged
merged 8 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,25 @@ or from conda:
conda install -c conda-forge nd2
```

### extras
### Legacy nd2 file support

Legacy nd2 (JPEG2000) files are also supported, but require `imagecodecs`. To install with support for these files use:
Legacy nd2 (JPEG2000) files are also supported, but require `imagecodecs`. To
install with support for these files use the `legacy` extra:

```sh
pip install nd2[legacy]
```

### Faster XML parsing

Much of the metadata in the file stored as XML. If found in the environment,
`nd2` will use [`lxml`](https://pypi.org/project/lxml/) which is much faster
than the built-in `xml` module. To install with support for `lxml` use:

```sh
pip install nd2 lxml
```

## usage and API

```python
Expand Down
44 changes: 19 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "Yet another nd2 (Nikon NIS Elements) file reader"
readme = "README.md"
requires-python = ">=3.7"
license = { text = "BSD 3-Clause License" }
authors = [{ email = "talley.lambert@gmail.com" }, { name = "Talley Lambert" }]
authors = [{ email = "talley.lambert@gmail.com", name = "Talley Lambert" }]
classifiers = [
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: BSD License",
Expand Down Expand Up @@ -71,35 +71,27 @@ version-file = "src/nd2/_version.py"
only-include = ["src"]
sources = ["src"]

# https://pycqa.github.io/isort/docs/configuration/options.html
[tool.isort]
profile = "black"
src_paths = ["src/nd2", "tests"]

# https://github.com/charliermarsh/ruff
# https://beta.ruff.rs/docs/rules/
[tool.ruff]
line-length = 88
target-version = "py37"
src = ["src/nd2", "tests"]
select = [
"E", # style errors
"F", # flakes
"D", # pydocstyle
"I", # isort
"UP", # pyupgrade
"S", # bandit
"C", # flake8-comprehensions
"B", # flake8-bugbear
"A001", # flake8-builtins
"RUF", # ruff-specific rules
"TCH", # flake8-type-checking
"E", # style errors
"F", # flakes
"D", # pydocstyle
"I", # isort
"UP", # pyupgrade
"S", # bandit
"C4", # flake8-comprehensions
"B", # flake8-bugbear
"A001", # flake8-builtins
"RUF", # ruff-specific rules
"SIM105", # contextlib.suppress
"TID", # tidy imports
"TCH", # flake8-type-checking
]
ignore = [
# these should be fixed
"D101",
"D105",
"D103",
###
"D100", # Missing docstring in public module
"D107", # Missing docstring in __init__
"D203", # 1 blank line required before class docstring
Expand All @@ -113,13 +105,14 @@ ignore = [
]

[tool.ruff.per-file-ignores]
"src/nd2/structures.py" = ["D101", "D105"] # Fix someday
"tests/*.py" = ["D", "S"]
"scripts/*.py" = ["D", "S"]

# https://docs.pytest.org/en/6.2.x/customize.html
[tool.pytest.ini_options]
minversion = "6.0"
addopts = '--color=yes'
addopts = '--color=yes --cov-config=pyproject.toml'
testpaths = ["tests"]
filterwarnings = [
"error",
Expand All @@ -143,6 +136,7 @@ ignore_missing_imports = true

# https://coverage.readthedocs.io/en/6.4/config.html
[tool.coverage.report]
show_missing = true
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
Expand All @@ -153,7 +147,7 @@ exclude_lines = [
]

[tool.coverage.run]
omit = ["tests"]
source = ["src"]

# https://github.com/mgedmin/check-manifest#configuration
[tool.check-manifest]
Expand Down
6 changes: 3 additions & 3 deletions scripts/gather.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""gather metadata from all files in test/data with all nd readers."""
import contextlib
import json
from pathlib import Path

Expand All @@ -19,10 +20,9 @@ def get_nd2_stats(file) -> dict:
d["pixel_size"] = m.channels[0].volume.axesCalibration
d["shape"] = fh.shape
d["axes"] = fh.axes
try:
with contextlib.suppress(Exception):
d["dtype"] = str(fh.dtype)
except Exception:
pass

fh.close()
return d

Expand Down
22 changes: 11 additions & 11 deletions src/nd2/_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def asarray(self) -> np.ndarray:
@classmethod
def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None:
"""Extract binary layers from an ND2 file."""
if nd2file.is_legacy:
if nd2file.is_legacy: # pragma: no cover
warnings.warn(
"`binary_data` is not supported for legacy ND2 files",
UserWarning,
Expand All @@ -166,30 +166,31 @@ def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None:
return None
rdr = cast("LatestSDKReader", nd2file._rdr)

binary_meta = rdr._decoded_custom_data_chunk(
b"BinaryMetadata_v1!", strip_prefix=True
)

if not binary_meta:
try:
binary_meta = rdr._decode_chunk(
b"CustomDataVar|BinaryMetadata_v1!", strip_prefix=True
)
except KeyError:
return None

try:
items: dict = binary_meta["BinaryMetadata_v1"]
except KeyError:
except KeyError: # pragma: no cover
warnings.warn(
"Could not find 'BinaryMetadata_v1' tag, please open an "
"issue with this file at https://github.com/tlambert03/nd2/issues/new",
stacklevel=2,
)
return None

binseqs = sorted(x for x in rdr._meta_map if "RleZipBinarySequence" in x)
binseqs = sorted(x for x in rdr.chunkmap if b"RleZipBinarySequence" in x)
mask_items = []
for _, item in sorted(items.items()):
key = item["FileTag"]
key = item["FileTag"].encode()
_masks: list[np.ndarray | None] = []
for bs in binseqs:
if key in bs:
data = rdr._load_chunk(f"{bs}!".encode())[4:]
data = rdr._load_chunk(bs)[4:]
_masks.append(_decode_binary_mask(data) if data else None)
mask_items.append(
BinaryLayer(
Expand All @@ -216,7 +217,6 @@ def _unpack(stream: io.BufferedIOBase, strct: struct.Struct) -> tuple:
def _decode_binary_mask(data: bytes, dtype: DTypeLike = "uint16") -> np.ndarray:
# this receives data as would be extracted from a
# `CustomDataSeq|RleZipBinarySequence...` section in the metadata
# data = f._rdr._get_meta_chunk('CustomDataSeq|RleZipBinarySequence_1_v1|0')[:4]

# NOTE it is up to ND2File to strip the first 4 bytes... and not call this if there
# is no data (i.e. if the chunk is just '\x00')
Expand Down
2 changes: 2 additions & 0 deletions src/nd2/_clx_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def _chunk_name_and_dtype(

data_type, name_length = strctBB.unpack(header)
if data_type == ELxLiteVariantType.COMPRESS:
# NOTE: the rois.nd2 test file has compressed metadata
# in b'CustomData|CustomDescriptionV1_0!'
raise NotImplementedError("Compressed metadata not yet implemented.")
if data_type in (ELxLiteVariantType.DEPRECATED, ELxLiteVariantType.UNKNOWN):
raise ValueError(f"Unknown data type in metadata header: {data_type}")
Expand Down
25 changes: 22 additions & 3 deletions src/nd2/_clx_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
import lxml.etree

Element = Union[xml.etree.ElementTree.Element, lxml.etree._Element]
Parser = Callable[[bytes], Element]
Parser = Callable[[bytes | str], Element]
Scalar = Union[float, str, int, bytearray, bool]
JsonValue = Union[Scalar, dict[str, "JsonValue"]]
XML: Parser
ParseError: Exception

else:
try:
Expand Down Expand Up @@ -73,7 +74,15 @@ def json_from_clx_variant(
on the XML structure. (A <variant><no_name>...</no_name></variant> is the most
likely case where a scalar is returned.)
"""
node = parser(bxml.split(b"?>", 1)[-1]) # strip xml header
if bxml.startswith(b"<?xml"):
bxml = bxml.split(b"?>", 1)[-1] # strip xml header

try:
node = parser(bxml)
except SyntaxError: # when there are invalid characters in the XML
# could go straight to this ... not sure if it's slower
node = parser(bxml.decode(encoding="utf-8", errors="ignore"))

is_legacy = node.attrib.get("_VERSION") == "1.000000"
name, val = _node_name_value(node, strip_prefix, include_attrs=is_legacy)

Expand Down Expand Up @@ -123,7 +132,17 @@ def _node_name_value(
# NOTE: "no_name" is the standard name for a list-type node
# "BinaryItem" is a special case found in the BinaryMetadata_v1 tag...
# without special handling, you would only get the last item in the list
if cname in ("no_name", None, "", "BinaryItem", "TextInfoItem"):
# FIXME: handle the special cases below "" better.
if cname in (
"no_name",
None,
"",
"BinaryItem",
"TextInfoItem",
"Wavelength",
"MinSrc",
"MaxSrc",
):
if not cval:
# skip empty nodes ... the sdk does this too
continue
Expand Down
5 changes: 3 additions & 2 deletions src/nd2/_legacy/_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@

import numpy as np

from .. import structures as strct
from .._util import AXIS, VoxelSize
from nd2 import structures as strct
from nd2._util import AXIS, VoxelSize

from ._legacy_xml import parse_xml_block

if TYPE_CHECKING:
Expand Down
30 changes: 16 additions & 14 deletions src/nd2/_pysdk/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
RawAttributesDict,
RawExperimentDict,
RawMetaDict,
RawTextInfoDict,
SpectLoopPars,
SpectrumDict,
TimeLoopPars,
Expand Down Expand Up @@ -237,7 +238,7 @@ def _load_single_experiment_loop(
count = loop_params.get("pPlanes", {}).get("uiCount", count)
return strct.SpectLoop(count=count)

raise NotImplementedError(
raise NotImplementedError( # pragma: no cover
f"We've never seen a file like this! (loop_type={loop_type!r}). We'd "
"appreciate it if you would submit this file at "
"https://github.com/tlambert03/nd2/issues/new",
Expand Down Expand Up @@ -347,28 +348,29 @@ def _get_spectrum_max(item: SpectrumDict | None) -> float:
return max(spectrum, key=lambda x: x[0])[1] if spectrum else 0.0


def load_text_info(src: dict) -> strct.TextInfo:
# we only want keys that are present in the src
def load_text_info(raw_txt_info: RawTextInfoDict) -> strct.TextInfo:
# we only want keys that are present in the raw_txt_info

out = {
key: src[lookup]
key: raw_txt_info.get(lookup)
for key, lookup in (
("appVersion", "TextInfoItem_14"),
("imageId", "TextInfoItem_0"),
("type", "TextInfoItem_1"),
("group", "TextInfoItem_2"),
("sampleId", "TextInfoItem_3"),
("author", "TextInfoItem_4"),
("description", "TextInfoItem_5"),
("capturing", "TextInfoItem_6"),
("conclusion", "TextInfoItem_10"),
("sampling", "TextInfoItem_7"),
("location", "TextInfoItem_8"),
("date", "TextInfoItem_9"),
("description", "TextInfoItem_5"),
("group", "TextInfoItem_2"),
("imageId", "TextInfoItem_0"),
("conclusion", "TextInfoItem_10"),
("info1", "TextInfoItem_11"),
("info2", "TextInfoItem_12"),
("location", "TextInfoItem_8"),
("optics", "TextInfoItem_13"),
("sampleId", "TextInfoItem_3"),
("sampling", "TextInfoItem_7"),
("type", "TextInfoItem_1"),
("appVersion", "TextInfoItem_14"),
)
if src.get(lookup)
if raw_txt_info.get(lookup)
}
return cast(strct.TextInfo, out)

Expand Down
Loading