Skip to content

Commit 1c56a39

Browse files
committed
rename TileMetadata to OutlierDetector
1 parent 3ac7ec1 commit 1c56a39

File tree

4 files changed

+29
-29
lines changed

4 files changed

+29
-29
lines changed

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,15 @@ let's try it out!
9696

9797

9898
```python
99-
from histoprep.utils import TileMetadata
99+
from histoprep.utils import OutlierDetector
100100

101101
# Let's wrap the tile metadata with a helper class.
102-
metadata = TileMetadata(tile_metadata)
102+
detector = OutlierDetector(tile_metadata)
103103
# Cluster tiles based on image metrics.
104-
clusters = metadata.cluster_kmeans(num_clusters=4, random_state=666)
104+
clusters = detector.cluster_kmeans(num_clusters=4, random_state=666)
105105
# Visualise first cluster.
106106
reader.get_annotated_thumbnail(
107-
image=reader.read_level(-1), coordinates=metadata.coordinates[clusters == 0]
107+
image=reader.read_level(-1), coordinates=detector.coordinates[clusters == 0]
108108
)
109109
```
110110
![Tiles in cluster 0](images/thumbnail_blue.jpeg)

histoprep/utils/_process.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Helper class for preprocessing tile images."""
22

3-
__all__ = ["TileMetadata"]
3+
__all__ = ["OutlierDetector"]
44

55
from typing import Optional, Union
66

@@ -26,7 +26,7 @@
2626
XYWH_COLUMNS = ["x", "y", "w", "h"]
2727

2828

29-
class TileMetadata:
29+
class OutlierDetector:
3030
"""Class for exploring tile metadata and detecting outliers."""
3131

3232
def __init__(self, dataframe: pl.DataFrame) -> None:

scripts/usage.ipynb

+11-11
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"name": "stderr",
1010
"output_type": "stream",
1111
"text": [
12-
"slide_with_ink: 100%|██████████| 390/390 [00:01<00:00, 271.79it/s]\n"
12+
"slide_with_ink: 100%|██████████| 390/390 [00:01<00:00, 259.65it/s]\n"
1313
]
1414
}
1515
],
@@ -75,21 +75,21 @@
7575
}
7676
],
7777
"source": [
78-
"from histoprep.utils import TileMetadata\n",
78+
"from histoprep.utils import OutlierDetector\n",
7979
"\n",
8080
"# Let's wrap the tile metadata with a helper class.\n",
81-
"metadata = TileMetadata(tile_metadata)\n",
81+
"detector = OutlierDetector(tile_metadata)\n",
8282
"# Cluster tiles based on image metrics.\n",
83-
"clusters = metadata.cluster_kmeans(num_clusters=4, random_state=666)\n",
83+
"clusters = detector.cluster_kmeans(num_clusters=4, random_state=666)\n",
8484
"# Visualise first cluster.\n",
8585
"reader.get_annotated_thumbnail(\n",
86-
" image=reader.read_level(-1), coordinates=metadata.coordinates[clusters == 0]\n",
86+
" image=reader.read_level(-1), coordinates=detector.coordinates[clusters == 0]\n",
8787
")"
8888
]
8989
},
9090
{
9191
"cell_type": "code",
92-
"execution_count": 4,
92+
"execution_count": 6,
9393
"metadata": {},
9494
"outputs": [],
9595
"source": [
@@ -98,19 +98,19 @@
9898
"# Copy images.\n",
9999
"shutil.copy(\n",
100100
" \"./train_tiles/slide_with_ink/thumbnail.jpeg\",\n",
101-
" \"../docs/images/thumbnail.jpeg\",\n",
101+
" \"../images/thumbnail.jpeg\",\n",
102102
")\n",
103103
"shutil.copy(\n",
104104
" \"./train_tiles/slide_with_ink/thumbnail_tiles.jpeg\",\n",
105-
" \"../docs/images/thumbnail_tiles.jpeg\",\n",
105+
" \"../images/thumbnail_tiles.jpeg\",\n",
106106
")\n",
107107
"shutil.copy(\n",
108108
" \"./train_tiles/slide_with_ink/thumbnail_tissue.jpeg\",\n",
109-
" \"../docs/images/thumbnail_tissue.jpeg\",\n",
109+
" \"../images/thumbnail_tissue.jpeg\",\n",
110110
")\n",
111111
"reader.get_annotated_thumbnail(\n",
112-
" image=reader.read_level(-1), coordinates=metadata.coordinates[clusters == 0]\n",
113-
").save(\"../docs/images/thumbnail_blue.jpeg\")\n",
112+
" image=reader.read_level(-1), coordinates=detector.coordinates[clusters == 0]\n",
113+
").save(\"../images/thumbnail_blue.jpeg\")\n",
114114
"shutil.rmtree(\"./train_tiles\")"
115115
]
116116
}

tests/metadata_test.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44

55
from histoprep import SlideReader
6-
from histoprep.utils import TileMetadata
6+
from histoprep.utils import OutlierDetector
77

88
from ._utils import SLIDE_PATH_JPEG, TMP_DIRECTORY, clean_temporary_directory
99

@@ -24,7 +24,7 @@ def generate_metadata(*, clean_tmp: bool = True, **kwargs) -> pl.DataFrame:
2424

2525

2626
def test_metadata_properties() -> None:
27-
metadata = TileMetadata(generate_metadata())
27+
metadata = OutlierDetector(generate_metadata())
2828
assert isinstance(metadata.dataframe, pl.DataFrame)
2929
assert isinstance(metadata.dataframe_without_metrics, pl.DataFrame)
3030
assert metadata.dataframe_without_metrics.columns == [
@@ -39,50 +39,50 @@ def test_metadata_properties() -> None:
3939
(0.8448732156862745, 0.7013530588235295, 0.7794474117647058),
4040
(0.13158384313725494, 0.1708792549019608, 0.13072776470588235),
4141
)
42-
assert str(metadata) == "TileMetadata(num_images=100, num_outliers=0)"
42+
assert str(metadata) == "OutlierDetector(num_images=100, num_outliers=0)"
4343

4444

4545
def test_metadata_index_columns() -> None:
46-
metadata = TileMetadata(generate_metadata())
46+
metadata = OutlierDetector(generate_metadata())
4747
assert isinstance(metadata["path"], np.ndarray)
4848
assert isinstance(metadata["red_mean"], np.ndarray)
4949

5050

5151
def test_metadata_from_parquet() -> None:
5252
generate_metadata(clean_tmp=False)
53-
metadata = TileMetadata.from_parquet(TMP_DIRECTORY / "slide" / "*.parquet")
53+
metadata = OutlierDetector.from_parquet(TMP_DIRECTORY / "slide" / "*.parquet")
5454
assert len(metadata.metric_columns) == 64
5555
clean_temporary_directory()
5656

5757

5858
def test_metadata_from_csv() -> None:
5959
generate_metadata(clean_tmp=False, use_csv=True)
60-
metadata = TileMetadata.from_csv(TMP_DIRECTORY / "slide" / "*.csv")
60+
metadata = OutlierDetector.from_csv(TMP_DIRECTORY / "slide" / "*.csv")
6161
assert len(metadata.metric_columns) == 64
6262
clean_temporary_directory()
6363

6464

6565
def test_metadata_plot_histogram() -> None:
66-
metadata = TileMetadata(generate_metadata(clean_tmp=False))
66+
metadata = OutlierDetector(generate_metadata(clean_tmp=False))
6767
metadata.plot_histogram("red_mean", num_images=0)
6868
metadata.plot_histogram("red_mean", num_images=12)
6969
clean_temporary_directory()
7070

7171

7272
def test_metadata_plot_histogram_fail() -> None:
73-
metadata = TileMetadata(generate_metadata())
73+
metadata = OutlierDetector(generate_metadata())
7474
with pytest.raises(ValueError, match="Difference between min=0.0 and max=0.0"):
7575
metadata.plot_histogram("black_pixels", num_images=0)
7676

7777

7878
def test_metadata_no_metrics_fail() -> None:
7979
dataframe = generate_metadata()
8080
with pytest.raises(ValueError, match="Metadata does not contain any metrics"):
81-
TileMetadata(dataframe["x", "y", "w", "h", "path"])
81+
OutlierDetector(dataframe["x", "y", "w", "h", "path"])
8282

8383

8484
def test_metadata_plot_collage() -> None:
85-
metadata = TileMetadata(generate_metadata(clean_tmp=False))
85+
metadata = OutlierDetector(generate_metadata(clean_tmp=False))
8686
assert metadata.random_image_collage(~metadata.outliers, num_rows=4).size == (
8787
1024,
8888
256,
@@ -97,7 +97,7 @@ def test_metadata_plot_collage() -> None:
9797

9898

9999
def test_metadata_add_outliers() -> None:
100-
metadata = TileMetadata(generate_metadata())
100+
metadata = OutlierDetector(generate_metadata())
101101
metadata.add_outliers(metadata["background"] > 0.5, desc="too high background")
102102
assert metadata.outliers.sum() == 27
103103
assert len(metadata.outlier_selections) == 1
@@ -106,6 +106,6 @@ def test_metadata_add_outliers() -> None:
106106

107107

108108
def test_metadata_cluster() -> None:
109-
metadata = TileMetadata(generate_metadata())
109+
metadata = OutlierDetector(generate_metadata())
110110
clusters = metadata.cluster_kmeans(10)
111111
assert len(clusters) == len(metadata)

0 commit comments

Comments
 (0)