Add nuclear speckles data with tests (#84)

* add nuclear speckles data for testing * add citation * add tests * linting * add docs to test util function * remove extra line Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> --------- Co-authored-by: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com>
WayScience · Sep 4, 2024 · 157bd8d · 157bd8d
1 parent 1c6fb28
commit 157bd8d
Show file tree

Hide file tree

Showing 13 changed files with 180 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -143,8 +143,5 @@ cython_debug/
 
 .DS_Store
 
-# data used for testing but not yet ready to be checked in
-tests/data/cytotable/Nuclear_speckles
-
 # jupyter notebook build files from myst-nb
 docs/jupyter_execute
diff --git a/CITATION.cff b/CITATION.cff
@@ -79,3 +79,19 @@ references:
     identifiers:
     - type: doi
       value: 10.6084/m9.figshare.22233700.v4
+  - authors:
+      - name: "Way Lab and Alexander Lab Nuclear Speckles Collaboration"
+    date-accessed: "2024-09-04"
+    title: Way Lab and Alexander Lab Nuclear Speckles Collaboration Data
+    type: data
+    repository-code: https://github.com/WayScience/nuclear_speckle_image_profiling
+    notes: >-
+      Data from a collaborative project focusing on nuclear speckles
+      with the Way Lab and Alexander Lab s used to help validate
+      expected results. Parquet data is generated from CellProfiler
+      and CytoTable. Images courtesy of Katherine Alexander
+      and the Alexander Lab.
+    identifiers:
+      - description: "Github Link with Contributors"
+        type: url
+        value: "https://github.com/WayScience/nuclear_speckle_image_profiling/graphs/contributors"
diff --git a/media/coverage-badge.svg b/media/coverage-badge.svg
diff --git a/src/cosmicqc/frame.py b/src/cosmicqc/frame.py
@@ -631,13 +631,13 @@ def draw_outline_on_image(actual_image_path: str, mask_image_path: str) -> Image
         """
         # Load the TIFF image
         tiff_image_array = skimage.io.imread(actual_image_path)
-        # Convert to PIL Image and then to 'RGBA'
 
         # Check if the image is 16-bit and grayscale
         if tiff_image_array.dtype == np.uint16:
             # Normalize the image to 8-bit for display purposes
             tiff_image_array = (tiff_image_array / 256).astype(np.uint8)
 
+        # Convert to PIL Image and then to 'RGBA'
         tiff_image = Image.fromarray(tiff_image_array).convert("RGBA")
 
         # Check if the image is too dark and adjust brightness if needed

diff --git a/src/cosmicqc/image.py b/src/cosmicqc/image.py
@@ -29,6 +29,7 @@ def is_image_too_dark(image: Image, pixel_brightness_threshold: float = 10.0) ->
     # Calculate the mean brightness
     mean_brightness = np.mean(gray_image)
 
+    print(mean_brightness)
     return mean_brightness < pixel_brightness_threshold
 
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -11,6 +11,7 @@
 import pandas as pd
 import plotly.colors as pc
 import pytest
+import skimage
 from PIL import Image
 
 
@@ -35,6 +36,14 @@ def fixture_cytotable_NF1_data_parquet_shrunken():
     )
 
 
+@pytest.fixture(name="cytotable_nuclear_speckles_data_parquet")
+def fixture_cytotable_nuclear_speckle_data_parquet():
+    """
+    Return df to test CytoTable nuclear speckles data through shrunken parquet file
+    """
+    return "tests/data/cytotable/nuclear_speckles/test_slide1_converted.parquet"
+
+
 @pytest.fixture(name="basic_outlier_dataframe")
 def fixture_basic_outlier_dataframe():
     """
@@ -150,3 +159,16 @@ def fixture_bright_image():
     # Create a bright image (50x50 pixels, almost white)
     bright_img_array = np.full((50, 50, 3), 255, dtype=np.uint8)
     return Image.fromarray(bright_img_array)
+
+
+@pytest.fixture
+def fixture_nuclear_speckle_example_image():
+    # create an image array from example nuclear speckle data
+    return Image.fromarray(
+        (
+            skimage.io.imread(
+                "tests/data/cytotable/nuclear_speckles/images/plate1/slide1_A1_M10_CH0_Z09_illumcorrect.tiff"
+            )
+            / 256
+        ).astype(np.uint8)
+    ).convert("RGBA")
diff --git a/tests/data/cytotable/nuclear_speckles/images/plate1/slide1_A1_M10_CH0_Z09_illumcorrect.tiff b/tests/data/cytotable/nuclear_speckles/images/plate1/slide1_A1_M10_CH0_Z09_illumcorrect.tiff
diff --git a/...ytotable/nuclear_speckles/masks/plate1/slide1_A1_M10_CH0_Z09_illumcorrect_MaskNuclei.tiff b/...ytotable/nuclear_speckles/masks/plate1/slide1_A1_M10_CH0_Z09_illumcorrect_MaskNuclei.tiff
diff --git a/tests/data/cytotable/nuclear_speckles/shrink_source_data.py b/tests/data/cytotable/nuclear_speckles/shrink_source_data.py
@@ -0,0 +1,31 @@
+"""
+Module to shrink source data for testing.
+
+Original source of data (processing):
+https://github.com/WayScience/nuclear_speckle_image_profiling
+"""
+
+import os
+
+import pandas as pd
+
+# note: we assume the dataset has been manually added to the
+# directory containing this module.
+filename = f"{os.path.dirname(__file__)}/slide1_converted.parquet"
+
+# read the data from parquet, sample a fraction of the data
+df = pd.read_parquet(filename)
+
+# filter to only those data which include slide1_A1_M10_CH0_Z09_illumcorrect
+df = df[
+    (
+        df["Image_FileName_A647"].str.contains(
+            img_str := "slide1_A1_M10_CH0_Z09_illumcorrect"
+        )
+    )
+    | (df["Image_FileName_DAPI"].str.contains(img_str))
+    | (df["Image_FileName_GOLD"].str.contains(img_str))
+]
+
+# export to a new file
+df.to_parquet(f"{os.path.dirname(__file__)}/test_slide1_converted.parquet")
diff --git a/tests/data/cytotable/nuclear_speckles/test_slide1_converted.parquet b/tests/data/cytotable/nuclear_speckles/test_slide1_converted.parquet
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -2,19 +2,16 @@
 Tests cosmicqc CytoDataFrame module
 """
 
-import base64
 import pathlib
-import re
-from io import BytesIO
 
 import cosmicqc
-import numpy as np
 import pandas as pd
 import plotly
 from cosmicqc.frame import CytoDataFrame
-from PIL import Image
 from pyarrow import parquet
 
+from tests.utils import cytodataframe_image_display_contains_green_pixels
+
 
 def test_CytoDataFrame_with_dataframe(
     tmp_path: pathlib.Path,
@@ -135,51 +132,32 @@ def test_show_report(cytotable_CFReT_data_df: pd.DataFrame):
     assert report_path.is_file()
 
 
-def test_repr_html(cytotable_NF1_data_parquet_shrunken: str):
+def test_repr_html(
+    cytotable_NF1_data_parquet_shrunken: str,
+    cytotable_nuclear_speckles_data_parquet: str,
+):
     """
     Tests how images are rendered through customized repr_html in CytoDataFrame.
     """
 
-    # create cytodataframe with context and mask dirs
-    scdf = CytoDataFrame(
-        data=cytotable_NF1_data_parquet_shrunken,
-        data_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_images",
-        data_mask_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_masks",
-    )
-
-    # Collect HTML output from repr_html
-    html_output = scdf[
-        ["Image_FileName_DAPI", "Image_FileName_GFP", "Image_FileName_RFP"]
-    ]._repr_html_()
-
-    # Extract all base64 image data from the HTML
-    matches = re.findall(r'data:image/png;base64,([^"]+)', html_output)
-    assert len(matches) > 0, "No base64 image data found in HTML"
-
-    # Select the third base64 image data (indexing starts from 0)
-    # (we expect the first ones to not contain outlines based on the
-    # html and example data)
-    base64_data = matches[2]
-
-    # Decode the base64 image data
-    image_data = base64.b64decode(base64_data)
-    image = Image.open(BytesIO(image_data)).convert("RGB")
-
-    # Check for the presence of green pixels in the image
-    image_array = np.array(image)
-
-    # gather color channels from image
-    red_channel = image_array[:, :, 0]
-    green_channel = image_array[:, :, 1]
-    blue_channel = image_array[:, :, 2]
-
-    # Define a threshold to identify greenish pixels
-    green_threshold = 50
-    green_pixels = (
-        (green_channel > green_threshold)
-        & (green_channel > red_channel)
-        & (green_channel > blue_channel)
-    )
-
     # Ensure there's at least one greenish pixel in the image
-    assert np.any(green_pixels), "The image does not contain green outlines."
+    assert cytodataframe_image_display_contains_green_pixels(
+        frame=CytoDataFrame(
+            data=cytotable_NF1_data_parquet_shrunken,
+            data_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_images",
+            data_mask_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_masks",
+        ),
+        image_cols=["Image_FileName_DAPI", "Image_FileName_GFP", "Image_FileName_RFP"],
+    ), "The NF1 images do not contain green outlines."
+    assert cytodataframe_image_display_contains_green_pixels(
+        frame=CytoDataFrame(
+            data=cytotable_nuclear_speckles_data_parquet,
+            data_context_dir=f"{pathlib.Path(cytotable_nuclear_speckles_data_parquet).parent}/images",
+            data_mask_context_dir=f"{pathlib.Path(cytotable_nuclear_speckles_data_parquet).parent}/masks",
+        ),
+        image_cols=[
+            "Image_FileName_A647",
+            "Image_FileName_DAPI",
+            "Image_FileName_GOLD",
+        ],
+    ), "The nuclear speckles images do not contain green outlines."
diff --git a/tests/test_image.py b/tests/test_image.py
@@ -40,3 +40,13 @@ def test_adjust_image_brightness_with_mid_brightness_image(
     adjusted_image = adjust_image_brightness(fixture_mid_brightness_image)
     # The image should still not be too dark after adjustment
     assert not is_image_too_dark(adjusted_image, pixel_brightness_threshold=10.0)
+
+
+def test_adjust_nuclear_speckle_image_brightness(
+    fixture_nuclear_speckle_example_image: Image,
+):
+    assert is_image_too_dark(fixture_nuclear_speckle_example_image)
+    assert not is_image_too_dark(
+        adjust_image_brightness(fixture_nuclear_speckle_example_image),
+        pixel_brightness_threshold=3.0,
+    )
diff --git a/tests/utils.py b/tests/utils.py
@@ -2,8 +2,15 @@
 Utilities for running pytest tests in coSMicQC
 """
 
+import base64
+import re
 import subprocess
-from typing import Tuple
+from io import BytesIO
+from typing import List, Tuple
+
+import numpy as np
+from cosmicqc import CytoDataFrame
+from PIL import Image
 
 
 def run_cli_command(command: str) -> Tuple[str, str, int]:
@@ -21,3 +28,67 @@ def run_cli_command(command: str) -> Tuple[str, str, int]:
         command.split(" "), capture_output=True, text=True, check=False
     )
     return result.stdout, result.stderr, result.returncode
+
+
+def cytodataframe_image_display_contains_green_pixels(
+    frame: CytoDataFrame, image_cols: List[str]
+) -> bool:
+    """
+    Determines if relevant image from the CytoDataFrame HTML
+    contains green pixels.
+
+    Args:
+        frame (CytoDataFrame):
+            A custom `CytoDataFrame` object which includes image paths.
+        image_cols (List[str]):
+            A list of column names in the `CytoDataFrame`
+            that contain images paths.
+
+    Returns:
+        bool:
+            True if any greenish pixels are found in relevant
+            image within the HTML, otherwise False.
+
+    Raises:
+        ValueError:
+            If no base64-encoded image data is found in the
+            HTML representation of the given columns.
+    """
+
+    # gather HTML output from CytoDataFrame
+    html_output = frame[image_cols]._repr_html_()
+
+    # Extract all base64 image data from the HTML
+    matches = re.findall(r'data:image/png;base64,([^"]+)', html_output)
+
+    # check that we have matches
+    if not len(matches) > 0:
+        raise ValueError("No base64 image data found in HTML")
+
+    # Select the third base64 image data (indexing starts from 0)
+    # (we expect the first ones to not contain outlines based on the
+    # html and example data)
+    base64_data = matches[2]
+
+    # Decode the base64 image data
+    image_data = base64.b64decode(base64_data)
+    image = Image.open(BytesIO(image_data)).convert("RGB")
+
+    # Check for the presence of green pixels in the image
+    image_array = np.array(image)
+
+    # gather color channels from image
+    red_channel = image_array[:, :, 0]
+    green_channel = image_array[:, :, 1]
+    blue_channel = image_array[:, :, 2]
+
+    # Define a threshold to identify greenish pixels
+    green_threshold = 50
+    green_pixels = (
+        (green_channel > green_threshold)
+        & (green_channel > red_channel)
+        & (green_channel > blue_channel)
+    )
+
+    # return true/false if there's at least one greenish pixel in the image
+    return np.any(green_pixels)