13
13
from pathml .datasets .utils import download_from_url
14
14
from pathml .preprocessing .transforms import TissueDetectionHE
15
15
from pathml .preprocessing .pipeline import Pipeline
16
+ from pathml .core .slide_classes import HESlide
17
+ from pathml .core .masks import Masks
16
18
17
19
class PesoDataModule (BaseDataModule ):
18
20
def __init__ (self ,
@@ -39,10 +41,8 @@ def __repr__(self):
39
41
return f"repr=(DataModule for PESO segmentation dataset)"
40
42
41
43
def _download_peso (self , download_dir ):
42
- # throw exception if download directory exists
43
44
# TODO: check hash
44
45
if not os .path .isdir (download_dir ):
45
- x .
46
46
print ("Downloading Peso Dataset. Total file size is ~100GB, please wait." )
47
47
files = ['peso_testset_mapping.csv' ,'peso_testset_png.zip' ,'peso_testset_png_padded.zip' ,'peso_testset_regions.zip' ,'peso_testset_wsi_1.zip' ,'peso_testset_wsi_2.zip' ,'peso_testset_wsi_3.zip' ,'peso_testset_wsi_4.zip' ,'peso_training_colordeconvolution.zip' ,'peso_training_masks.zip' ,'peso_training_masks_corrected.zip' ,'peso_training_wsi_1.zip' ,'peso_training_wsi_2.zip' ,'peso_training_wsi_3.zip' ,'peso_training_wsi_4.zip' ,'peso_training_wsi_5.zip' ,'peso_training_wsi_6.zip' ]
48
48
url = f'https://zenodo.org/record/1485967/files/'
@@ -56,36 +56,39 @@ def _download_peso(self, download_dir):
56
56
with zipfile .ZipFile (f"{ root } /{ file } " ,'r' ) as zip_ref :
57
57
zip_ref .extractall (f"{ root } /{ Path (file ).stem } " )
58
58
os .remove (f"{ root } /{ file } " )
59
- trainingwsifolders = [
60
- 'peso_training_wsi_1' ,
61
- 'peso_training_wsi_2' ,
62
- 'peso_training_wsi_3' ,
63
- 'peso_training_wsi_4' ,
64
- 'peso_training_wsi_5' ,
65
- 'peso_training_wsi_6'
66
- ]
67
- for trainingwsifolder in trainingwsifolders :
68
- for file in os .listdir (Path (download_dir )/ Path (trainingwsifolder )):
69
- if file .endswith ('.tif' ):
70
- name = '_' .join (file .split ('_' )[:- 1 ])
71
- maskpath = Path (name + '_HE_training_mask.tif' )
72
- mask = HESlide (filepath = Path (download_dir )/ Path ('peso_training_masks' )/ maskpath , name = name )
73
- shape1 , shape2 = mask .slide .get_image_shape ()
74
- shape = (shape2 , shape1 )
75
- mask = mask .slide .slide .read_region (((0 ,0 )), 0 , shape )
76
- mask , _ , _ , _ = mask .split ()
77
- # mask.point(lambda x: x * 255) # optionally convert to 255 for img
78
- masks = {'stroma' : mask }
79
- wsi = HESlide (file , masks = masks )
80
- pipeline = Pipeline ([
81
- TissueDetectionHE (mask_name = 'tissue' , min_region_size = 500 ,
82
- threshold = 30 , outer_contours_only = True )
83
- ])
84
- # TODO: choose tile size
85
- slide .run (pipeline , tile_size = 224 )
86
- wsi .write (Path (download_dir )/ Path ('h5' )/ Path (name + '.h5' ))
87
- os .remove (Path (download_dir )/ Path (traininwsifolder )/ Path (file ))
88
- os .remove (Path (download_dir )/ Path ('peso_training_masks' )/ maskpath )
59
+ trainingwsifolders = [
60
+ 'peso_training_wsi_1' ,
61
+ 'peso_training_wsi_2' ,
62
+ 'peso_training_wsi_3' ,
63
+ 'peso_training_wsi_4' ,
64
+ 'peso_training_wsi_5' ,
65
+ 'peso_training_wsi_6'
66
+ ]
67
+ for trainingwsifolder in trainingwsifolders :
68
+ for file in os .listdir (Path (download_dir )/ Path (trainingwsifolder )):
69
+ if file .endswith ('.tif' ):
70
+ name = '_' .join (file .split ('_' )[:- 1 ])
71
+ maskpath = Path (name + '_HE_training_mask.tif' )
72
+ mask = HESlide (filepath = str (Path (download_dir )/ Path ('peso_training_masks' )/ maskpath ), name = name )
73
+ shape1 , shape2 = mask .slide .get_image_shape ()
74
+ shape = (shape2 , shape1 )
75
+ print (f"image shape is { shape } " )
76
+ mask = mask .slide .slide .read_region (((0 ,0 )), 0 , shape )
77
+ mask , _ , _ , _ = mask .split ()
78
+ mask = np .array (mask )
79
+ # mask.point(lambda x: x * 255) # optionally convert to dynamic range 255 for img
80
+ masks = {'stroma' : mask }
81
+ masks = Masks (masks )
82
+ wsi = HESlide (str (Path (download_dir )/ Path (trainingwsifolder )/ Path (file )) , masks = masks )
83
+ pipeline = Pipeline ([
84
+ TissueDetectionHE (mask_name = 'tissue' , min_region_size = 500 ,
85
+ threshold = 30 , outer_contours_only = True )
86
+ ])
87
+ # TODO: choose tile size
88
+ wsi .run (pipeline , tile_size = 3000 )
89
+ wsi .write (str (Path (download_dir )/ Path ('h5' )/ Path (name + '.h5' )))
90
+ os .remove (str (Path (download_dir )/ Path (traininwsifolder )/ Path (file )))
91
+ os .remove (str (Path (download_dir )/ Path ('peso_training_masks' )/ maskpath ))
89
92
90
93
else :
91
94
warn (f'download_dir exists, download canceled' )
@@ -184,4 +187,3 @@ def __getitem__(self, ix):
184
187
self .wsi = read (self .data_dir / Path ('h5' ) / Path (wsiname + '.h5' ))
185
188
tile = self .wsi .tiles [index ]
186
189
return tile
187
-
0 commit comments