Skip to content

Commit

Permalink
add rescue (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
tlambert03 authored Mar 13, 2022
1 parent 57edb34 commit 2f1f137
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 10 deletions.
11 changes: 10 additions & 1 deletion src/nd2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,18 @@
__version__ = "unknown"
__author__ = "Talley Lambert"
__email__ = "talley.lambert@gmail.com"
__all__ = ["ND2File", "imread", "structures", "AXIS", "is_supported_file"]
__all__ = [
"ND2File",
"imread",
"structures",
"AXIS",
"is_supported_file",
"read_chunkmap",
"rescue_nd2",
]


from . import structures
from ._chunkmap import read_chunkmap, rescue_nd2
from ._util import AXIS, is_supported_file
from .nd2file import ND2File, imread
166 changes: 157 additions & 9 deletions src/nd2/_chunkmap.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
from __future__ import annotations

import io
import mmap
import struct
from contextlib import contextmanager
from typing import TYPE_CHECKING, overload

import numpy as np
from typing_extensions import TypedDict

if TYPE_CHECKING:
from typing import BinaryIO, Dict, Iterator, Literal, Optional, Set, Tuple, Union

from numpy.typing import DTypeLike

# h = short (2)
# i = int (4)
# I = unsigned int (4)
Expand Down Expand Up @@ -41,22 +45,61 @@ class FixedImageMap(TypedDict):

@overload
def read_chunkmap(
file: Union[str, BinaryIO], fixup: Literal[True] = True, legacy: bool = False
file: Union[str, BinaryIO],
*,
fixup: Literal[True] = True,
legacy: bool = False,
search_window: int = ...,
) -> Tuple[FixedImageMap, Dict[str, int]]:
...


@overload
def read_chunkmap(
file: Union[str, BinaryIO], fixup: Literal[False], legacy: bool = False
file: Union[str, BinaryIO],
*,
fixup: Literal[False],
legacy: bool = False,
search_window: int = ...,
) -> Tuple[Dict[int, int], Dict[str, int]]:
...


def read_chunkmap(file: Union[str, BinaryIO], fixup=True, legacy: bool = False):
def read_chunkmap(
file: Union[str, BinaryIO],
*,
fixup=True,
legacy: bool = False,
search_window: int = 100,
):
"""Read chunkmap of nd2 `file`.
Parameters
----------
file : Union[str, BinaryIO]
Filename or file handle to nd2 file.
fixup : bool, optional
Whether to verify (and attempt to fix) frames whose positions have been
shifted relative to the predicted offset (i.e. in a corrupted file),
by default True.
legacy : bool, optional
Treat file as legacy nd2 format, by default False
search_window : int, optional
When fixup is true, this is the search window (in KB) that will be used
to try to find the actual chunk position. by default 100 KB
Returns
-------
tuple
(image chunk positions, metadata chunk positions). If `fixup` is true,
the image chunk dict will have three keys:
`bad`: estimated frame positions that could not be verified
`fixed`: estimated frame positions that were wrong, but corrected
`safe`: estimated frame positions that were found to be correct.
"""
with ensure_handle(file) as fh:
if not legacy:
return read_new_chunkmap(fh)
return read_new_chunkmap(fh, fixup=fixup, search_window=search_window)
from ._legacy import legacy_nd2_chunkmap

d = legacy_nd2_chunkmap(fh)
Expand All @@ -65,7 +108,9 @@ def read_chunkmap(file: Union[str, BinaryIO], fixup=True, legacy: bool = False):
return f, d


def read_new_chunkmap(fh: BinaryIO, fixup=True):
def read_new_chunkmap(
fh: BinaryIO, fixup: bool = True, search_window: int = 100
) -> Tuple[Union[Dict[int, int], FixedImageMap], Dict[str, int]]:
"""read the map of the chunks at the end of the file
chunk rules:
Expand Down Expand Up @@ -111,11 +156,13 @@ def read_new_chunkmap(fh: BinaryIO, fixup=True):
meta_map[name[:-1].decode("ascii")] = position
pos = p + 16
if fixup:
return _fix_frames(fh, image_map), meta_map
return _fix_frames(fh, image_map, kbrange=search_window), meta_map
return image_map, meta_map


def _fix_frames(fh: BinaryIO, images: Dict[int, int]) -> FixedImageMap:
def _fix_frames(
fh: BinaryIO, images: Dict[int, int], kbrange: int = 100
) -> FixedImageMap:
"""Look for corrupt frames, and try to find their actual positions."""
bad: Set[int] = set()
fixed: Set[int] = set()
Expand All @@ -126,7 +173,9 @@ def _fix_frames(fh: BinaryIO, images: Dict[int, int]) -> FixedImageMap:
magic, shift, length = CHUNK_INFO.unpack(fh.read(16))
_lengths.add(length)
if magic != CHUNK_MAGIC: # corrupt frame
correct_pos = _search(fh, b"ImageDataSeq|%a!" % fnum, images[fnum])
correct_pos = _search(
fh, b"ImageDataSeq|%a!" % fnum, images[fnum], kbrange=kbrange
)
if correct_pos is not None:
fixed.add(fnum)
safe[fnum] = correct_pos + 24 + int(shift)
Expand All @@ -138,7 +187,7 @@ def _fix_frames(fh: BinaryIO, images: Dict[int, int]) -> FixedImageMap:
return {"bad": bad, "fixed": fixed, "safe": safe}


def _search(fh: BinaryIO, string: bytes, guess: int, kbrange=100):
def _search(fh: BinaryIO, string: bytes, guess: int, kbrange: int = 100):
"""Search for `string`, in the `kbrange` bytes around position `guess`."""
fh.seek(max(guess - ((1000 * kbrange) // 2), 0))
try:
Expand Down Expand Up @@ -175,3 +224,102 @@ def iter_chunks(handle) -> Iterator[Tuple[str, int, int]]:
if pos >= file_size:
break
handle.seek(pos)


def rescue_nd2(
handle: Union[BinaryIO, str],
frame_shape: Tuple[int, ...] = (),
dtype: DTypeLike = "uint16",
max_iters: Optional[int] = None,
verbose=True,
chunk_start=CHUNK_MAGIC.to_bytes(4, "little"),
):
"""Iterator that yields all discovered frames in a file handle
In nd2 files, each "frame" contains XY and all channel info (both true
channels as well as RGB components). Frames are laid out as (Y, X, C),
and the `frame_shape` should match the expected frame size. If
`frame_shape` is not provided, a guess will be made about the vector shape
of each frame, but it may be incorrect.
Parameters
----------
handle : Union[BinaryIO,str]
Filepath string, or binary file handle (For example
`handle = open('some.nd2', 'rb')`)
frame_shape : Tuple[int, ...], optional
expected shape of each frame, by default a 1 dimensional array will
be yielded for each frame, which can be reshaped later if desired.
NOTE: nd2 frames are generally ordered as
(height, width, true_channels, rgbcomponents).
So unlike numpy, which would use (channels, Y, X), you should use
(Y, X, channels)
dtype : np.dtype, optional
Data type, by default np.uint16
max_iters : Optional[int], optional
A maximum number of frames to yield, by default will yield until the
end of the file is reached
Yields
------
np.ndarray
each discovered frame in the file
Examples
--------
>>> with open('some_bad.nd2', 'rb') as fh:
>>> frames = rescue_nd2(fh, (512, 512, 4), 'uint16')
>>> ary = np.stack(frames)
You will likely want to reshape `ary` after that.
"""
dtype = np.dtype(dtype)
with ensure_handle(handle) as _fh:
mm = mmap.mmap(_fh.fileno(), 0, access=mmap.ACCESS_READ)

offset = 0
iters = 0
while True:
# search for the next part of the file starting with CHUNK_START
offset = mm.find(chunk_start, offset)
if offset < 0:
if verbose:
print("End of file.")
return

# location at the end of the chunk header
end_hdr = offset + CHUNK_INFO.size

# find the next "!"
# In nd2 files, each data chunk starts with the
# string "ImageDataSeq|N" ... where N is the frame index
next_bang = mm.find(b"!", end_hdr)
if next_bang > 0 and (0 < next_bang - end_hdr < 128):
# if we find the "!"... make sure we have an ImageDataSeq
chunk_name = mm[end_hdr:next_bang]
if chunk_name.startswith(b"ImageDataSeq|"):
if verbose:
print(f"Found image {iters} at offset {offset}")
# Now, read the actual data
_, shift, length = CHUNK_INFO.unpack(mm[offset:end_hdr])
# convert to numpy array and yield
# (can't remember why the extra 8 bytes)
try:
shape = frame_shape or ((length - 8) // dtype.itemsize,)
yield np.ndarray(
shape=shape,
dtype=dtype,
buffer=mm,
offset=end_hdr + shift + 8,
)
except TypeError as e:
# buffer is likely too small
if verbose:
print(f"Error at offset {offset}: {e}")
iters += 1
elif verbose:
print(f"Found chunk at offset {offset} with no image data")

offset += 1
if max_iters and iters >= max_iters:
return
18 changes: 18 additions & 0 deletions tests/test_rescue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import nd2
import numpy as np


def test_rescue(single_nd2):
# TODO: we could potentially put more of this logic into convenience functions
# we can't do too much magic about guessing shape and dtype since some files
# may not have that information intact
with nd2.ND2File(single_nd2) as rdr:
real_read = rdr.asarray()
raw_frames = [
f.transpose((2, 0, 1, 3)).squeeze()
for f in nd2.rescue_nd2(
single_nd2, frame_shape=rdr._raw_frame_shape, dtype=rdr.dtype
)
]
raw_read: np.ndarray = np.stack(raw_frames).reshape(rdr.shape)
np.testing.assert_array_equal(real_read, raw_read)

0 comments on commit 2f1f137

Please sign in to comment.