3
3
import pytest
4
4
5
5
from histoprep import SlideReader
6
- from histoprep .utils import TileMetadata
6
+ from histoprep .utils import OutlierDetector
7
7
8
8
from ._utils import SLIDE_PATH_JPEG , TMP_DIRECTORY , clean_temporary_directory
9
9
@@ -24,7 +24,7 @@ def generate_metadata(*, clean_tmp: bool = True, **kwargs) -> pl.DataFrame:
24
24
25
25
26
26
def test_metadata_properties () -> None :
27
- metadata = TileMetadata (generate_metadata ())
27
+ metadata = OutlierDetector (generate_metadata ())
28
28
assert isinstance (metadata .dataframe , pl .DataFrame )
29
29
assert isinstance (metadata .dataframe_without_metrics , pl .DataFrame )
30
30
assert metadata .dataframe_without_metrics .columns == [
@@ -39,50 +39,50 @@ def test_metadata_properties() -> None:
39
39
(0.8448732156862745 , 0.7013530588235295 , 0.7794474117647058 ),
40
40
(0.13158384313725494 , 0.1708792549019608 , 0.13072776470588235 ),
41
41
)
42
- assert str (metadata ) == "TileMetadata (num_images=100, num_outliers=0)"
42
+ assert str (metadata ) == "OutlierDetector (num_images=100, num_outliers=0)"
43
43
44
44
45
45
def test_metadata_index_columns () -> None :
46
- metadata = TileMetadata (generate_metadata ())
46
+ metadata = OutlierDetector (generate_metadata ())
47
47
assert isinstance (metadata ["path" ], np .ndarray )
48
48
assert isinstance (metadata ["red_mean" ], np .ndarray )
49
49
50
50
51
51
def test_metadata_from_parquet () -> None :
52
52
generate_metadata (clean_tmp = False )
53
- metadata = TileMetadata .from_parquet (TMP_DIRECTORY / "slide" / "*.parquet" )
53
+ metadata = OutlierDetector .from_parquet (TMP_DIRECTORY / "slide" / "*.parquet" )
54
54
assert len (metadata .metric_columns ) == 64
55
55
clean_temporary_directory ()
56
56
57
57
58
58
def test_metadata_from_csv () -> None :
59
59
generate_metadata (clean_tmp = False , use_csv = True )
60
- metadata = TileMetadata .from_csv (TMP_DIRECTORY / "slide" / "*.csv" )
60
+ metadata = OutlierDetector .from_csv (TMP_DIRECTORY / "slide" / "*.csv" )
61
61
assert len (metadata .metric_columns ) == 64
62
62
clean_temporary_directory ()
63
63
64
64
65
65
def test_metadata_plot_histogram () -> None :
66
- metadata = TileMetadata (generate_metadata (clean_tmp = False ))
66
+ metadata = OutlierDetector (generate_metadata (clean_tmp = False ))
67
67
metadata .plot_histogram ("red_mean" , num_images = 0 )
68
68
metadata .plot_histogram ("red_mean" , num_images = 12 )
69
69
clean_temporary_directory ()
70
70
71
71
72
72
def test_metadata_plot_histogram_fail () -> None :
73
- metadata = TileMetadata (generate_metadata ())
73
+ metadata = OutlierDetector (generate_metadata ())
74
74
with pytest .raises (ValueError , match = "Difference between min=0.0 and max=0.0" ):
75
75
metadata .plot_histogram ("black_pixels" , num_images = 0 )
76
76
77
77
78
78
def test_metadata_no_metrics_fail () -> None :
79
79
dataframe = generate_metadata ()
80
80
with pytest .raises (ValueError , match = "Metadata does not contain any metrics" ):
81
- TileMetadata (dataframe ["x" , "y" , "w" , "h" , "path" ])
81
+ OutlierDetector (dataframe ["x" , "y" , "w" , "h" , "path" ])
82
82
83
83
84
84
def test_metadata_plot_collage () -> None :
85
- metadata = TileMetadata (generate_metadata (clean_tmp = False ))
85
+ metadata = OutlierDetector (generate_metadata (clean_tmp = False ))
86
86
assert metadata .random_image_collage (~ metadata .outliers , num_rows = 4 ).size == (
87
87
1024 ,
88
88
256 ,
@@ -97,7 +97,7 @@ def test_metadata_plot_collage() -> None:
97
97
98
98
99
99
def test_metadata_add_outliers () -> None :
100
- metadata = TileMetadata (generate_metadata ())
100
+ metadata = OutlierDetector (generate_metadata ())
101
101
metadata .add_outliers (metadata ["background" ] > 0.5 , desc = "too high background" )
102
102
assert metadata .outliers .sum () == 27
103
103
assert len (metadata .outlier_selections ) == 1
@@ -106,6 +106,6 @@ def test_metadata_add_outliers() -> None:
106
106
107
107
108
108
def test_metadata_cluster () -> None :
109
- metadata = TileMetadata (generate_metadata ())
109
+ metadata = OutlierDetector (generate_metadata ())
110
110
clusters = metadata .cluster_kmeans (10 )
111
111
assert len (clusters ) == len (metadata )
0 commit comments