Skip to content

Commit

Permalink
refactor: cleaning up reader (#141)
Browse files Browse the repository at this point in the history
* refactor: cleaning up reader

* cleanup roi read

* update docs and ruff rules

* update readme

* coverage

* coverage

* add note

* add pragma
  • Loading branch information
tlambert03 authored Jun 15, 2023
1 parent 0d1242b commit 9b91e1f
Show file tree
Hide file tree
Showing 19 changed files with 397 additions and 347 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,25 @@ or from conda:
conda install -c conda-forge nd2
```

### extras
### Legacy nd2 file support

Legacy nd2 (JPEG2000) files are also supported, but require `imagecodecs`. To install with support for these files use:
Legacy nd2 (JPEG2000) files are also supported, but require `imagecodecs`. To
install with support for these files use the `legacy` extra:

```sh
pip install nd2[legacy]
```

### Faster XML parsing

Much of the metadata in the file stored as XML. If found in the environment,
`nd2` will use [`lxml`](https://pypi.org/project/lxml/) which is much faster
than the built-in `xml` module. To install with support for `lxml` use:

```sh
pip install nd2 lxml
```

## usage and API

```python
Expand Down
44 changes: 19 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "Yet another nd2 (Nikon NIS Elements) file reader"
readme = "README.md"
requires-python = ">=3.7"
license = { text = "BSD 3-Clause License" }
authors = [{ email = "talley.lambert@gmail.com" }, { name = "Talley Lambert" }]
authors = [{ email = "talley.lambert@gmail.com", name = "Talley Lambert" }]
classifiers = [
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: BSD License",
Expand Down Expand Up @@ -71,35 +71,27 @@ version-file = "src/nd2/_version.py"
only-include = ["src"]
sources = ["src"]

# https://pycqa.github.io/isort/docs/configuration/options.html
[tool.isort]
profile = "black"
src_paths = ["src/nd2", "tests"]

# https://github.com/charliermarsh/ruff
# https://beta.ruff.rs/docs/rules/
[tool.ruff]
line-length = 88
target-version = "py37"
src = ["src/nd2", "tests"]
select = [
"E", # style errors
"F", # flakes
"D", # pydocstyle
"I", # isort
"UP", # pyupgrade
"S", # bandit
"C", # flake8-comprehensions
"B", # flake8-bugbear
"A001", # flake8-builtins
"RUF", # ruff-specific rules
"TCH", # flake8-type-checking
"E", # style errors
"F", # flakes
"D", # pydocstyle
"I", # isort
"UP", # pyupgrade
"S", # bandit
"C4", # flake8-comprehensions
"B", # flake8-bugbear
"A001", # flake8-builtins
"RUF", # ruff-specific rules
"SIM105", # contextlib.suppress
"TID", # tidy imports
"TCH", # flake8-type-checking
]
ignore = [
# these should be fixed
"D101",
"D105",
"D103",
###
"D100", # Missing docstring in public module
"D107", # Missing docstring in __init__
"D203", # 1 blank line required before class docstring
Expand All @@ -113,13 +105,14 @@ ignore = [
]

[tool.ruff.per-file-ignores]
"src/nd2/structures.py" = ["D101", "D105"] # Fix someday
"tests/*.py" = ["D", "S"]
"scripts/*.py" = ["D", "S"]

# https://docs.pytest.org/en/6.2.x/customize.html
[tool.pytest.ini_options]
minversion = "6.0"
addopts = '--color=yes'
addopts = '--color=yes --cov-config=pyproject.toml'
testpaths = ["tests"]
filterwarnings = [
"error",
Expand All @@ -143,6 +136,7 @@ ignore_missing_imports = true

# https://coverage.readthedocs.io/en/6.4/config.html
[tool.coverage.report]
show_missing = true
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
Expand All @@ -153,7 +147,7 @@ exclude_lines = [
]

[tool.coverage.run]
omit = ["tests"]
source = ["src"]

# https://github.com/mgedmin/check-manifest#configuration
[tool.check-manifest]
Expand Down
6 changes: 3 additions & 3 deletions scripts/gather.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""gather metadata from all files in test/data with all nd readers."""
import contextlib
import json
from pathlib import Path

Expand All @@ -19,10 +20,9 @@ def get_nd2_stats(file) -> dict:
d["pixel_size"] = m.channels[0].volume.axesCalibration
d["shape"] = fh.shape
d["axes"] = fh.axes
try:
with contextlib.suppress(Exception):
d["dtype"] = str(fh.dtype)
except Exception:
pass

fh.close()
return d

Expand Down
22 changes: 11 additions & 11 deletions src/nd2/_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def asarray(self) -> np.ndarray:
@classmethod
def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None:
"""Extract binary layers from an ND2 file."""
if nd2file.is_legacy:
if nd2file.is_legacy: # pragma: no cover
warnings.warn(
"`binary_data` is not supported for legacy ND2 files",
UserWarning,
Expand All @@ -166,30 +166,31 @@ def from_nd2file(cls, nd2file: ND2File) -> BinaryLayers | None:
return None
rdr = cast("LatestSDKReader", nd2file._rdr)

binary_meta = rdr._decoded_custom_data_chunk(
b"BinaryMetadata_v1!", strip_prefix=True
)

if not binary_meta:
try:
binary_meta = rdr._decode_chunk(
b"CustomDataVar|BinaryMetadata_v1!", strip_prefix=True
)
except KeyError:
return None

try:
items: dict = binary_meta["BinaryMetadata_v1"]
except KeyError:
except KeyError: # pragma: no cover
warnings.warn(
"Could not find 'BinaryMetadata_v1' tag, please open an "
"issue with this file at https://github.com/tlambert03/nd2/issues/new",
stacklevel=2,
)
return None

binseqs = sorted(x for x in rdr._meta_map if "RleZipBinarySequence" in x)
binseqs = sorted(x for x in rdr.chunkmap if b"RleZipBinarySequence" in x)
mask_items = []
for _, item in sorted(items.items()):
key = item["FileTag"]
key = item["FileTag"].encode()
_masks: list[np.ndarray | None] = []
for bs in binseqs:
if key in bs:
data = rdr._load_chunk(f"{bs}!".encode())[4:]
data = rdr._load_chunk(bs)[4:]
_masks.append(_decode_binary_mask(data) if data else None)
mask_items.append(
BinaryLayer(
Expand All @@ -216,7 +217,6 @@ def _unpack(stream: io.BufferedIOBase, strct: struct.Struct) -> tuple:
def _decode_binary_mask(data: bytes, dtype: DTypeLike = "uint16") -> np.ndarray:
# this receives data as would be extracted from a
# `CustomDataSeq|RleZipBinarySequence...` section in the metadata
# data = f._rdr._get_meta_chunk('CustomDataSeq|RleZipBinarySequence_1_v1|0')[:4]

# NOTE it is up to ND2File to strip the first 4 bytes... and not call this if there
# is no data (i.e. if the chunk is just '\x00')
Expand Down
2 changes: 2 additions & 0 deletions src/nd2/_clx_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def _chunk_name_and_dtype(

data_type, name_length = strctBB.unpack(header)
if data_type == ELxLiteVariantType.COMPRESS:
# NOTE: the rois.nd2 test file has compressed metadata
# in b'CustomData|CustomDescriptionV1_0!'
raise NotImplementedError("Compressed metadata not yet implemented.")
if data_type in (ELxLiteVariantType.DEPRECATED, ELxLiteVariantType.UNKNOWN):
raise ValueError(f"Unknown data type in metadata header: {data_type}")
Expand Down
25 changes: 22 additions & 3 deletions src/nd2/_clx_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
import lxml.etree

Element = Union[xml.etree.ElementTree.Element, lxml.etree._Element]
Parser = Callable[[bytes], Element]
Parser = Callable[[bytes | str], Element]
Scalar = Union[float, str, int, bytearray, bool]
JsonValue = Union[Scalar, dict[str, "JsonValue"]]
XML: Parser
ParseError: Exception

else:
try:
Expand Down Expand Up @@ -73,7 +74,15 @@ def json_from_clx_variant(
on the XML structure. (A <variant><no_name>...</no_name></variant> is the most
likely case where a scalar is returned.)
"""
node = parser(bxml.split(b"?>", 1)[-1]) # strip xml header
if bxml.startswith(b"<?xml"):
bxml = bxml.split(b"?>", 1)[-1] # strip xml header

try:
node = parser(bxml)
except SyntaxError: # when there are invalid characters in the XML
# could go straight to this ... not sure if it's slower
node = parser(bxml.decode(encoding="utf-8", errors="ignore"))

is_legacy = node.attrib.get("_VERSION") == "1.000000"
name, val = _node_name_value(node, strip_prefix, include_attrs=is_legacy)

Expand Down Expand Up @@ -123,7 +132,17 @@ def _node_name_value(
# NOTE: "no_name" is the standard name for a list-type node
# "BinaryItem" is a special case found in the BinaryMetadata_v1 tag...
# without special handling, you would only get the last item in the list
if cname in ("no_name", None, "", "BinaryItem", "TextInfoItem"):
# FIXME: handle the special cases below "" better.
if cname in (
"no_name",
None,
"",
"BinaryItem",
"TextInfoItem",
"Wavelength",
"MinSrc",
"MaxSrc",
):
if not cval:
# skip empty nodes ... the sdk does this too
continue
Expand Down
5 changes: 3 additions & 2 deletions src/nd2/_legacy/_legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@

import numpy as np

from .. import structures as strct
from .._util import AXIS, VoxelSize
from nd2 import structures as strct
from nd2._util import AXIS, VoxelSize

from ._legacy_xml import parse_xml_block

if TYPE_CHECKING:
Expand Down
30 changes: 16 additions & 14 deletions src/nd2/_pysdk/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
RawAttributesDict,
RawExperimentDict,
RawMetaDict,
RawTextInfoDict,
SpectLoopPars,
SpectrumDict,
TimeLoopPars,
Expand Down Expand Up @@ -237,7 +238,7 @@ def _load_single_experiment_loop(
count = loop_params.get("pPlanes", {}).get("uiCount", count)
return strct.SpectLoop(count=count)

raise NotImplementedError(
raise NotImplementedError( # pragma: no cover
f"We've never seen a file like this! (loop_type={loop_type!r}). We'd "
"appreciate it if you would submit this file at "
"https://github.com/tlambert03/nd2/issues/new",
Expand Down Expand Up @@ -347,28 +348,29 @@ def _get_spectrum_max(item: SpectrumDict | None) -> float:
return max(spectrum, key=lambda x: x[0])[1] if spectrum else 0.0


def load_text_info(src: dict) -> strct.TextInfo:
# we only want keys that are present in the src
def load_text_info(raw_txt_info: RawTextInfoDict) -> strct.TextInfo:
# we only want keys that are present in the raw_txt_info

out = {
key: src[lookup]
key: raw_txt_info.get(lookup)
for key, lookup in (
("appVersion", "TextInfoItem_14"),
("imageId", "TextInfoItem_0"),
("type", "TextInfoItem_1"),
("group", "TextInfoItem_2"),
("sampleId", "TextInfoItem_3"),
("author", "TextInfoItem_4"),
("description", "TextInfoItem_5"),
("capturing", "TextInfoItem_6"),
("conclusion", "TextInfoItem_10"),
("sampling", "TextInfoItem_7"),
("location", "TextInfoItem_8"),
("date", "TextInfoItem_9"),
("description", "TextInfoItem_5"),
("group", "TextInfoItem_2"),
("imageId", "TextInfoItem_0"),
("conclusion", "TextInfoItem_10"),
("info1", "TextInfoItem_11"),
("info2", "TextInfoItem_12"),
("location", "TextInfoItem_8"),
("optics", "TextInfoItem_13"),
("sampleId", "TextInfoItem_3"),
("sampling", "TextInfoItem_7"),
("type", "TextInfoItem_1"),
("appVersion", "TextInfoItem_14"),
)
if src.get(lookup)
if raw_txt_info.get(lookup)
}
return cast(strct.TextInfo, out)

Expand Down
Loading

0 comments on commit 9b91e1f

Please sign in to comment.