fix: fix image width again (#92)

tlambert03 · Sep 10, 2022 · ef9161a · ef9161a
1 parent cdb9157
commit ef9161a
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 76 deletions.
diff --git a/src/nd2/_sdk/latest.pyx b/src/nd2/_sdk/latest.pyx
@@ -1,6 +1,5 @@
 import json
 import mmap
-import warnings
 from pathlib import Path
 from typing import List, Optional, Sequence, Tuple
 
@@ -113,17 +112,6 @@ cdef class ND2Reader:
             cont = self._metadata().get('contents')
             attrs = self._attributes()
             nC = cont.get('channelCount') if cont else attrs.get("componentCount", 1)
-            # widthPx doesn't always equal widthBytes / bytesPerPixel ... but when it doesn't
-            # the image is slanted anyway. For now, we just force it here.
-            w = attrs.get('widthBytes') // (attrs.get("componentCount", 1) * attrs.get('bitsPerComponentInMemory') // 8)
-            if w != attrs['widthPx']:
-                wb = attrs.get('widthBytes')
-                bpp = (attrs.get('bitsPerComponentInMemory') // 8)
-                warnings.warn(
-                    f"widthPx ({attrs['widthPx']}) != widthBytes ({wb}) / bytesPerPixel ({bpp}). "
-                    f"Forcing widthPx to {w} (widthBytes / bytesPerPixel)."
-                )
-                attrs['widthPx'] = w
             self.__attributes = structures.Attributes(**attrs, channelCount=nC)
         return self.__attributes
 
@@ -261,17 +249,29 @@ cdef class ND2Reader:
             attr = self.attributes
             self.__raw_frame_shape = (
                     attr.heightPx,
-                    attr.widthPx or -1,
+                    attr.widthBytes // (self._bytes_per_pixel()) // attr.componentCount,
                     attr.channelCount or 1,
                     attr.componentCount // (attr.channelCount or 1),
                 )
         return self.__raw_frame_shape
 
+    cdef _actual_frame_shape(self):
+        attr = self.attributes
+        return (
+                attr.heightPx,
+                attr.widthPx,
+                attr.channelCount or 1,
+                attr.componentCount // (attr.channelCount or 1),
+            )
+
+    def _bytes_per_pixel(self):
+        return self.attributes.bitsPerComponentInMemory // 8
+
     cdef _dtype(self):
         if self.__dtype is None:
             a = self.attributes
             d = a.pixelDataType[0] if a.pixelDataType else "u"
-            self.__dtype = np.dtype(f"{d}{a.bitsPerComponentInMemory // 8}")
+            self.__dtype = np.dtype(f"{d}{self._bytes_per_pixel()}")
         return self.__dtype
 
     def _read_image_with_sdk(self, LIMUINT seq_index):
@@ -298,48 +298,46 @@ cdef class ND2Reader:
         if offset is None:
             return self._missing_frame(index)
 
-        # try:
-        #     return np.ndarray(
-        #         shape=self._raw_frame_shape(),
-        #         dtype=self._dtype(),
-        #         buffer=self._mmap,
-        #         offset=offset,
-        #         strides=self._strides,
-        #     )
-        # except TypeError:
-        #     # If the chunkmap is wrong, and the mmap isn't long enough
-        #     # for the requested offset & size, a TypeError is raised.
-        #     return self._missing_frame(index)
-
         try:
-            return np.frombuffer(
-                self._mmap,
+            return np.ndarray(
+                shape=self._actual_frame_shape(),
                 dtype=self._dtype(),
-                count=np.prod(self._raw_frame_shape()),
-                offset=offset
-            )  # this will be reshaped in nd2file.py
-
-        except ValueError:
+                buffer=self._mmap,
+                offset=offset,
+                strides=self._strides,
+            )
+            #     buf = np.frombuffer(
+            #         self._mmap,
+            #         dtype=self._dtype(),
+            #         count=np.prod(self._raw_frame_shape()),
+            #         offset=offset
+            #     )
+            #     return np.lib.stride_tricks.as_strided(
+            #         buf,
+            #         shape=self._actual_frame_shape(),
+            #         strides=self._strides,
+            #     )
+        except TypeError:
             # If the chunkmap is wrong, and the mmap isn't long enough
-            # for the requested offset & size, a ValueError is raised.
+            # for the requested offset & size, a TypeError is raised.
             return self._missing_frame(index)
 
     @property
     def _strides(self):
         if not hasattr(self, '__strides'):
             a = self.attributes
-            width = a.widthPx
+            widthP = a.widthPx
             widthB = a.widthBytes
-            if not (width and widthB):
+            if not (widthP and widthB):
                 self.__strides = None
             else:
-                bypc = a.bitsPerComponentInMemory // 8
-                array_stride = widthB - (bypc * width * a.componentCount)
+                bypc = self._bytes_per_pixel()
+                array_stride = widthB - (bypc * widthP * a.componentCount)
                 if array_stride == 0:
                     self.__strides = None
                 else:
                     self.__strides = (
-                        array_stride + width * bypc * a.componentCount,
+                        array_stride + widthP * bypc * a.componentCount,
                         a.componentCount * bypc,
                         a.componentCount // a.channelCount * bypc,
                         bypc,

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -2,7 +2,6 @@
 import os
 import pickle
 import sys
-from contextlib import nullcontext
 from pathlib import Path
 
 import dask.array as da
@@ -17,31 +16,15 @@
 DATA = Path(__file__).parent / "data"
 
 
-def _warning_ctx(fname: Path):
-    if fname.name in {
-        "jonas_control002.nd2",
-        "jonas_JJ1473_control_24h_JJ1473_control_24h_03.nd2",
-    }:
-        return pytest.warns(UserWarning, match="widthPx")
-    return nullcontext()
-
-
 def test_metadata_extraction(new_nd2: Path):
     assert ND2File.is_supported_file(new_nd2)
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         assert nd.path == str(new_nd2)
         assert not nd.closed
 
         # assert isinstance(nd._rdr._seq_count(), int)
         assert isinstance(nd.attributes, structures.Attributes)
 
-        # this is one of the "skewed" files where widthPx seems
-        # to be set incorrectly in the actual metadata
-        if new_nd2.name == "jonas_control002.nd2":
-            assert nd.attributes.widthPx == 248
-            assert nd.shape == (65, 9, 152, 248)
-            assert nd.sizes["X"] == 248
-
         # TODO: deal with typing when metadata is completely missing
         assert isinstance(nd.metadata, structures.Metadata)
         assert isinstance(nd.frame_metadata(0), structures.FrameMetadata)
@@ -58,7 +41,7 @@ def test_metadata_extraction(new_nd2: Path):
 
 
 def test_read_safety(new_nd2: Path):
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         for i in range(nd._frame_count):
             nd._rdr._read_image(i)
 
@@ -67,7 +50,7 @@ def test_position(new_nd2):
     """use position to extract a single stage position with asarray."""
     if new_nd2.stat().st_size > 250_000_000:
         pytest.skip("skipping read on big files")
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         dx = nd.to_xarray(delayed=True, position=0, squeeze=False)
         nx = nd.to_xarray(delayed=False, position=0, squeeze=False)
         assert dx.sizes[AXIS.POSITION] == 1
@@ -79,7 +62,7 @@ def test_position(new_nd2):
 
 
 def test_dask(new_nd2):
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         dsk = nd.to_dask()
         assert isinstance(dsk, da.Array)
         assert dsk.shape == nd.shape
@@ -96,7 +79,7 @@ def test_dask_closed(single_nd2):
 
 @pytest.mark.skipif(bool(os.getenv("CIBUILDWHEEL")), reason="slow")
 def test_full_read(new_nd2):
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         if new_nd2.stat().st_size > 500_000_000:
             pytest.skip("skipping full read on big files")
         delayed_xarray: np.ndarray = np.asarray(nd.to_xarray(delayed=True))
@@ -126,7 +109,7 @@ def test_full_read_legacy(old_nd2):
 
 
 def test_xarray(new_nd2):
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         xarr = nd.to_xarray()
         assert isinstance(xarr, xr.DataArray)
         assert isinstance(xarr.data, da.Array)
@@ -193,7 +176,7 @@ def test_bioformats_parity(new_nd2: Path, bfshapes: dict):
         bf_info = {k: v for k, v in bfshapes[new_nd2.name]["shape"].items() if v > 1}
     except KeyError:
         pytest.skip(f"{new_nd2.name} not in stats")
-    with _warning_ctx(new_nd2), ND2File(new_nd2) as nd:
+    with ND2File(new_nd2) as nd:
         # doing these weird checks/asserts for better error messages
         if len(bf_info) != len(nd.sizes):
             assert bf_info == nd.sizes
@@ -304,3 +287,22 @@ def test_with_without_sdk(small_nd2s: Path):
             ValueError, match="compressed nd2 files with `read_using_sdk=False`"
         ):
             imread(small_nd2s, read_using_sdk=False)
+
+
+def test_extra_width_bytes():
+    expected = [
+        [203, 195, 193, 197],
+        [203, 195, 195, 197],
+        [205, 191, 192, 190],
+        [204, 201, 196, 206],
+    ]
+
+    im = imread(str(DATA / "jonas_JJ1473_control_24h_JJ1473_control_24h_03.nd2"))
+    np.testing.assert_array_equal(im[0, 0, :4, :4], expected)
+
+    im = imread(
+        str(DATA / "jonas_JJ1473_control_24h_JJ1473_control_24h_03.nd2"),
+        read_using_sdk=True,
+    )
+    # NOTE: we actually WANT this to pass... but the SDK isn't doing the right thing
+    assert not np.array_equal(im[0, 0, :4, :4], expected)
diff --git a/tests/test_sdk.py b/tests/test_sdk.py
@@ -1,4 +1,3 @@
-from contextlib import nullcontext
 from pathlib import Path
 
 import numpy as np
@@ -7,18 +6,8 @@
 from nd2._sdk import latest
 
 
-# duplicated in test_reader
-def _warning_ctx(fname: Path):
-    if fname.name in {
-        "jonas_control002.nd2",
-        "jonas_JJ1473_control_24h_JJ1473_control_24h_03.nd2",
-    }:
-        return pytest.warns(UserWarning, match="widthPx")
-    return nullcontext()
-
-
 def test_new_sdk(new_nd2: Path):
-    with _warning_ctx(new_nd2), latest.ND2Reader(new_nd2, read_using_sdk=True) as nd:
+    with latest.ND2Reader(new_nd2, read_using_sdk=True) as nd:
         a = nd._attributes()
         assert isinstance(a, dict)
         assert isinstance(nd._metadata(), dict)