Skip to content

Commit 50c498e

Browse files
committed
functioning preprocessing
1 parent 649723b commit 50c498e

File tree

1 file changed

+35
-33
lines changed

1 file changed

+35
-33
lines changed

pathml/datasets/peso.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from pathml.datasets.utils import download_from_url
1414
from pathml.preprocessing.transforms import TissueDetectionHE
1515
from pathml.preprocessing.pipeline import Pipeline
16+
from pathml.core.slide_classes import HESlide
17+
from pathml.core.masks import Masks
1618

1719
class PesoDataModule(BaseDataModule):
1820
def __init__(self,
@@ -39,10 +41,8 @@ def __repr__(self):
3941
return f"repr=(DataModule for PESO segmentation dataset)"
4042

4143
def _download_peso(self, download_dir):
42-
# throw exception if download directory exists
4344
# TODO: check hash
4445
if not os.path.isdir(download_dir):
45-
x.
4646
print("Downloading Peso Dataset. Total file size is ~100GB, please wait.")
4747
files = ['peso_testset_mapping.csv','peso_testset_png.zip','peso_testset_png_padded.zip','peso_testset_regions.zip','peso_testset_wsi_1.zip','peso_testset_wsi_2.zip','peso_testset_wsi_3.zip','peso_testset_wsi_4.zip','peso_training_colordeconvolution.zip','peso_training_masks.zip','peso_training_masks_corrected.zip','peso_training_wsi_1.zip','peso_training_wsi_2.zip','peso_training_wsi_3.zip','peso_training_wsi_4.zip','peso_training_wsi_5.zip','peso_training_wsi_6.zip']
4848
url = f'https://zenodo.org/record/1485967/files/'
@@ -56,36 +56,39 @@ def _download_peso(self, download_dir):
5656
with zipfile.ZipFile(f"{root}/{file}",'r') as zip_ref:
5757
zip_ref.extractall(f"{root}/{Path(file).stem}")
5858
os.remove(f"{root}/{file}")
59-
trainingwsifolders = [
60-
'peso_training_wsi_1',
61-
'peso_training_wsi_2',
62-
'peso_training_wsi_3',
63-
'peso_training_wsi_4',
64-
'peso_training_wsi_5',
65-
'peso_training_wsi_6'
66-
]
67-
for trainingwsifolder in trainingwsifolders:
68-
for file in os.listdir(Path(download_dir)/Path(trainingwsifolder)):
69-
if file.endswith('.tif'):
70-
name = '_'.join(file.split('_')[:-1])
71-
maskpath = Path(name+'_HE_training_mask.tif')
72-
mask = HESlide(filepath = Path(download_dir)/Path('peso_training_masks')/maskpath, name = name)
73-
shape1, shape2 = mask.slide.get_image_shape()
74-
shape = (shape2, shape1)
75-
mask = mask.slide.slide.read_region(((0,0)), 0, shape)
76-
mask, _, _, _ = mask.split()
77-
# mask.point(lambda x: x * 255) # optionally convert to 255 for img
78-
masks = {'stroma': mask}
79-
wsi = HESlide(file , masks = masks)
80-
pipeline = Pipeline([
81-
TissueDetectionHE(mask_name = 'tissue', min_region_size = 500,
82-
threshold = 30, outer_contours_only = True)
83-
])
84-
# TODO: choose tile size
85-
slide.run(pipeline, tile_size=224)
86-
wsi.write(Path(download_dir)/Path('h5')/Path(name+'.h5'))
87-
os.remove(Path(download_dir)/Path(traininwsifolder)/Path(file))
88-
os.remove(Path(download_dir)/Path('peso_training_masks')/maskpath)
59+
trainingwsifolders = [
60+
'peso_training_wsi_1',
61+
'peso_training_wsi_2',
62+
'peso_training_wsi_3',
63+
'peso_training_wsi_4',
64+
'peso_training_wsi_5',
65+
'peso_training_wsi_6'
66+
]
67+
for trainingwsifolder in trainingwsifolders:
68+
for file in os.listdir(Path(download_dir)/Path(trainingwsifolder)):
69+
if file.endswith('.tif'):
70+
name = '_'.join(file.split('_')[:-1])
71+
maskpath = Path(name+'_HE_training_mask.tif')
72+
mask = HESlide(filepath = str(Path(download_dir)/Path('peso_training_masks')/maskpath), name = name)
73+
shape1, shape2 = mask.slide.get_image_shape()
74+
shape = (shape2, shape1)
75+
print(f"image shape is {shape}")
76+
mask = mask.slide.slide.read_region(((0,0)), 0, shape)
77+
mask, _, _, _ = mask.split()
78+
mask = np.array(mask)
79+
# mask.point(lambda x: x * 255) # optionally convert to dynamic range 255 for img
80+
masks = {'stroma': mask}
81+
masks = Masks(masks)
82+
wsi = HESlide(str(Path(download_dir)/Path(trainingwsifolder)/Path(file)) , masks = masks)
83+
pipeline = Pipeline([
84+
TissueDetectionHE(mask_name = 'tissue', min_region_size = 500,
85+
threshold = 30, outer_contours_only = True)
86+
])
87+
# TODO: choose tile size
88+
wsi.run(pipeline, tile_size=3000)
89+
wsi.write(str(Path(download_dir)/Path('h5')/Path(name+'.h5')))
90+
os.remove(str(Path(download_dir)/Path(traininwsifolder)/Path(file)))
91+
os.remove(str(Path(download_dir)/Path('peso_training_masks')/maskpath))
8992

9093
else:
9194
warn(f'download_dir exists, download canceled')
@@ -184,4 +187,3 @@ def __getitem__(self, ix):
184187
self.wsi = read(self.data_dir / Path('h5') / Path(wsiname + '.h5'))
185188
tile = self.wsi.tiles[index]
186189
return tile
187-

0 commit comments

Comments
 (0)