Skip to content

Commit

Permalink
Add nuclear speckles data with tests (#84)
Browse files Browse the repository at this point in the history
* add nuclear speckles data for testing

* add citation

* add tests

* linting

* add docs to test util function

* remove extra line

Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com>

---------

Co-authored-by: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com>
  • Loading branch information
d33bs and jenna-tomkinson authored Sep 4, 2024
1 parent 1c6fb28 commit 157bd8d
Show file tree
Hide file tree
Showing 13 changed files with 180 additions and 54 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,5 @@ cython_debug/

.DS_Store

# data used for testing but not yet ready to be checked in
tests/data/cytotable/Nuclear_speckles

# jupyter notebook build files from myst-nb
docs/jupyter_execute
16 changes: 16 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,19 @@ references:
identifiers:
- type: doi
value: 10.6084/m9.figshare.22233700.v4
- authors:
- name: "Way Lab and Alexander Lab Nuclear Speckles Collaboration"
date-accessed: "2024-09-04"
title: Way Lab and Alexander Lab Nuclear Speckles Collaboration Data
type: data
repository-code: https://github.com/WayScience/nuclear_speckle_image_profiling
notes: >-
Data from a collaborative project focusing on nuclear speckles
with the Way Lab and Alexander Lab s used to help validate
expected results. Parquet data is generated from CellProfiler
and CytoTable. Images courtesy of Katherine Alexander
and the Alexander Lab.
identifiers:
- description: "Github Link with Contributors"
type: url
value: "https://github.com/WayScience/nuclear_speckle_image_profiling/graphs/contributors"
2 changes: 1 addition & 1 deletion media/coverage-badge.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion src/cosmicqc/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,13 +631,13 @@ def draw_outline_on_image(actual_image_path: str, mask_image_path: str) -> Image
"""
# Load the TIFF image
tiff_image_array = skimage.io.imread(actual_image_path)
# Convert to PIL Image and then to 'RGBA'

# Check if the image is 16-bit and grayscale
if tiff_image_array.dtype == np.uint16:
# Normalize the image to 8-bit for display purposes
tiff_image_array = (tiff_image_array / 256).astype(np.uint8)

# Convert to PIL Image and then to 'RGBA'
tiff_image = Image.fromarray(tiff_image_array).convert("RGBA")

# Check if the image is too dark and adjust brightness if needed
Expand Down
1 change: 1 addition & 0 deletions src/cosmicqc/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def is_image_too_dark(image: Image, pixel_brightness_threshold: float = 10.0) ->
# Calculate the mean brightness
mean_brightness = np.mean(gray_image)

print(mean_brightness)
return mean_brightness < pixel_brightness_threshold


Expand Down
22 changes: 22 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd
import plotly.colors as pc
import pytest
import skimage
from PIL import Image


Expand All @@ -35,6 +36,14 @@ def fixture_cytotable_NF1_data_parquet_shrunken():
)


@pytest.fixture(name="cytotable_nuclear_speckles_data_parquet")
def fixture_cytotable_nuclear_speckle_data_parquet():
"""
Return df to test CytoTable nuclear speckles data through shrunken parquet file
"""
return "tests/data/cytotable/nuclear_speckles/test_slide1_converted.parquet"


@pytest.fixture(name="basic_outlier_dataframe")
def fixture_basic_outlier_dataframe():
"""
Expand Down Expand Up @@ -150,3 +159,16 @@ def fixture_bright_image():
# Create a bright image (50x50 pixels, almost white)
bright_img_array = np.full((50, 50, 3), 255, dtype=np.uint8)
return Image.fromarray(bright_img_array)


@pytest.fixture
def fixture_nuclear_speckle_example_image():
# create an image array from example nuclear speckle data
return Image.fromarray(
(
skimage.io.imread(
"tests/data/cytotable/nuclear_speckles/images/plate1/slide1_A1_M10_CH0_Z09_illumcorrect.tiff"
)
/ 256
).astype(np.uint8)
).convert("RGBA")
Binary file not shown.
Binary file not shown.
31 changes: 31 additions & 0 deletions tests/data/cytotable/nuclear_speckles/shrink_source_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Module to shrink source data for testing.
Original source of data (processing):
https://github.com/WayScience/nuclear_speckle_image_profiling
"""

import os

import pandas as pd

# note: we assume the dataset has been manually added to the
# directory containing this module.
filename = f"{os.path.dirname(__file__)}/slide1_converted.parquet"

# read the data from parquet, sample a fraction of the data
df = pd.read_parquet(filename)

# filter to only those data which include slide1_A1_M10_CH0_Z09_illumcorrect
df = df[
(
df["Image_FileName_A647"].str.contains(
img_str := "slide1_A1_M10_CH0_Z09_illumcorrect"
)
)
| (df["Image_FileName_DAPI"].str.contains(img_str))
| (df["Image_FileName_GOLD"].str.contains(img_str))
]

# export to a new file
df.to_parquet(f"{os.path.dirname(__file__)}/test_slide1_converted.parquet")
Binary file not shown.
74 changes: 26 additions & 48 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@
Tests cosmicqc CytoDataFrame module
"""

import base64
import pathlib
import re
from io import BytesIO

import cosmicqc
import numpy as np
import pandas as pd
import plotly
from cosmicqc.frame import CytoDataFrame
from PIL import Image
from pyarrow import parquet

from tests.utils import cytodataframe_image_display_contains_green_pixels


def test_CytoDataFrame_with_dataframe(
tmp_path: pathlib.Path,
Expand Down Expand Up @@ -135,51 +132,32 @@ def test_show_report(cytotable_CFReT_data_df: pd.DataFrame):
assert report_path.is_file()


def test_repr_html(cytotable_NF1_data_parquet_shrunken: str):
def test_repr_html(
cytotable_NF1_data_parquet_shrunken: str,
cytotable_nuclear_speckles_data_parquet: str,
):
"""
Tests how images are rendered through customized repr_html in CytoDataFrame.
"""

# create cytodataframe with context and mask dirs
scdf = CytoDataFrame(
data=cytotable_NF1_data_parquet_shrunken,
data_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_images",
data_mask_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_masks",
)

# Collect HTML output from repr_html
html_output = scdf[
["Image_FileName_DAPI", "Image_FileName_GFP", "Image_FileName_RFP"]
]._repr_html_()

# Extract all base64 image data from the HTML
matches = re.findall(r'data:image/png;base64,([^"]+)', html_output)
assert len(matches) > 0, "No base64 image data found in HTML"

# Select the third base64 image data (indexing starts from 0)
# (we expect the first ones to not contain outlines based on the
# html and example data)
base64_data = matches[2]

# Decode the base64 image data
image_data = base64.b64decode(base64_data)
image = Image.open(BytesIO(image_data)).convert("RGB")

# Check for the presence of green pixels in the image
image_array = np.array(image)

# gather color channels from image
red_channel = image_array[:, :, 0]
green_channel = image_array[:, :, 1]
blue_channel = image_array[:, :, 2]

# Define a threshold to identify greenish pixels
green_threshold = 50
green_pixels = (
(green_channel > green_threshold)
& (green_channel > red_channel)
& (green_channel > blue_channel)
)

# Ensure there's at least one greenish pixel in the image
assert np.any(green_pixels), "The image does not contain green outlines."
assert cytodataframe_image_display_contains_green_pixels(
frame=CytoDataFrame(
data=cytotable_NF1_data_parquet_shrunken,
data_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_images",
data_mask_context_dir=f"{pathlib.Path(cytotable_NF1_data_parquet_shrunken).parent}/Plate_2_masks",
),
image_cols=["Image_FileName_DAPI", "Image_FileName_GFP", "Image_FileName_RFP"],
), "The NF1 images do not contain green outlines."
assert cytodataframe_image_display_contains_green_pixels(
frame=CytoDataFrame(
data=cytotable_nuclear_speckles_data_parquet,
data_context_dir=f"{pathlib.Path(cytotable_nuclear_speckles_data_parquet).parent}/images",
data_mask_context_dir=f"{pathlib.Path(cytotable_nuclear_speckles_data_parquet).parent}/masks",
),
image_cols=[
"Image_FileName_A647",
"Image_FileName_DAPI",
"Image_FileName_GOLD",
],
), "The nuclear speckles images do not contain green outlines."
10 changes: 10 additions & 0 deletions tests/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,13 @@ def test_adjust_image_brightness_with_mid_brightness_image(
adjusted_image = adjust_image_brightness(fixture_mid_brightness_image)
# The image should still not be too dark after adjustment
assert not is_image_too_dark(adjusted_image, pixel_brightness_threshold=10.0)


def test_adjust_nuclear_speckle_image_brightness(
fixture_nuclear_speckle_example_image: Image,
):
assert is_image_too_dark(fixture_nuclear_speckle_example_image)
assert not is_image_too_dark(
adjust_image_brightness(fixture_nuclear_speckle_example_image),
pixel_brightness_threshold=3.0,
)
73 changes: 72 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
Utilities for running pytest tests in coSMicQC
"""

import base64
import re
import subprocess
from typing import Tuple
from io import BytesIO
from typing import List, Tuple

import numpy as np
from cosmicqc import CytoDataFrame
from PIL import Image


def run_cli_command(command: str) -> Tuple[str, str, int]:
Expand All @@ -21,3 +28,67 @@ def run_cli_command(command: str) -> Tuple[str, str, int]:
command.split(" "), capture_output=True, text=True, check=False
)
return result.stdout, result.stderr, result.returncode


def cytodataframe_image_display_contains_green_pixels(
frame: CytoDataFrame, image_cols: List[str]
) -> bool:
"""
Determines if relevant image from the CytoDataFrame HTML
contains green pixels.
Args:
frame (CytoDataFrame):
A custom `CytoDataFrame` object which includes image paths.
image_cols (List[str]):
A list of column names in the `CytoDataFrame`
that contain images paths.
Returns:
bool:
True if any greenish pixels are found in relevant
image within the HTML, otherwise False.
Raises:
ValueError:
If no base64-encoded image data is found in the
HTML representation of the given columns.
"""

# gather HTML output from CytoDataFrame
html_output = frame[image_cols]._repr_html_()

# Extract all base64 image data from the HTML
matches = re.findall(r'data:image/png;base64,([^"]+)', html_output)

# check that we have matches
if not len(matches) > 0:
raise ValueError("No base64 image data found in HTML")

# Select the third base64 image data (indexing starts from 0)
# (we expect the first ones to not contain outlines based on the
# html and example data)
base64_data = matches[2]

# Decode the base64 image data
image_data = base64.b64decode(base64_data)
image = Image.open(BytesIO(image_data)).convert("RGB")

# Check for the presence of green pixels in the image
image_array = np.array(image)

# gather color channels from image
red_channel = image_array[:, :, 0]
green_channel = image_array[:, :, 1]
blue_channel = image_array[:, :, 2]

# Define a threshold to identify greenish pixels
green_threshold = 50
green_pixels = (
(green_channel > green_threshold)
& (green_channel > red_channel)
& (green_channel > blue_channel)
)

# return true/false if there's at least one greenish pixel in the image
return np.any(green_pixels)

0 comments on commit 157bd8d

Please sign in to comment.