forked from tensorflow/datasets
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
https://patchcamelyon.grand-challenge.org/ PiperOrigin-RevId: 253198741
- Loading branch information
1 parent
27f1e7f
commit e9d6c9b
Showing
11 changed files
with
228 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# coding=utf-8 | ||
# Copyright 2019 The TensorFlow Datasets Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""PatchCamelyon images dataset.""" | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import h5py | ||
import tensorflow_datasets.public_api as tfds | ||
|
||
_DESCRIPTION = """\ | ||
The PatchCamelyon benchmark is a new and challenging image classification | ||
dataset. It consists of 327.680 color images (96 x 96px) extracted from | ||
histopathologic scans of lymph node sections. Each image is annoted with a | ||
binary label indicating presence of metastatic tissue. PCam provides a new | ||
benchmark for machine learning models: bigger than CIFAR10, smaller than | ||
Imagenet, trainable on a single GPU. | ||
""" | ||
_CITATION = """\ | ||
@misc{b_s_veeling_j_linmans_j_winkens_t_cohen_2018_2546921, | ||
author = {B. S. Veeling, J. Linmans, J. Winkens, T. Cohen, M. Welling}, | ||
title = {Rotation Equivariant CNNs for Digital Pathology}, | ||
month = sep, | ||
year = 2018, | ||
doi = {10.1007/978-3-030-00934-2_24}, | ||
url = {https://doi.org/10.1007/978-3-030-00934-2_24} | ||
} | ||
""" | ||
_URL = 'https://patchcamelyon.grand-challenge.org/' | ||
|
||
|
||
class PatchCamelyon(tfds.core.GeneratorBasedBuilder): | ||
"""PatchCamelyon.""" | ||
|
||
VERSION = tfds.core.Version('0.1.0') | ||
|
||
def _info(self): | ||
return tfds.core.DatasetInfo( | ||
builder=self, | ||
description=_DESCRIPTION, | ||
features=tfds.features.FeaturesDict({ | ||
'id': | ||
tfds.features.Text(), | ||
'image': | ||
tfds.features.Image(shape=(96, 96, 3), encoding_format='png'), | ||
'label': | ||
tfds.features.ClassLabel(num_classes=2), | ||
}), | ||
supervised_keys=('image', 'label'), | ||
urls=[_URL], | ||
citation=_CITATION) | ||
|
||
def _split_generators(self, dl_manager): | ||
base_url = 'https://zenodo.org/record/2546921/files/' | ||
resources = { | ||
'test_x': base_url + 'camelyonpatch_level_2_split_test_x.h5.gz', | ||
'test_y': base_url + 'camelyonpatch_level_2_split_test_y.h5.gz', | ||
'train_x': base_url + 'camelyonpatch_level_2_split_train_x.h5.gz', | ||
'train_y': base_url + 'camelyonpatch_level_2_split_train_y.h5.gz', | ||
'valid_x': base_url + 'camelyonpatch_level_2_split_valid_x.h5.gz', | ||
'valid_y': base_url + 'camelyonpatch_level_2_split_valid_y.h5.gz', | ||
} | ||
paths = dl_manager.download_and_extract(resources) | ||
return [ | ||
tfds.core.SplitGenerator( | ||
name=tfds.Split.TEST, | ||
num_shards=1, | ||
gen_kwargs=dict(split='test', paths=paths)), | ||
tfds.core.SplitGenerator( | ||
name=tfds.Split.TRAIN, | ||
num_shards=10, | ||
gen_kwargs=dict(split='train', paths=paths)), | ||
tfds.core.SplitGenerator( | ||
name=tfds.Split.VALIDATION, | ||
num_shards=1, | ||
gen_kwargs=dict(split='valid', paths=paths)), | ||
] | ||
|
||
def _generate_examples(self, split, paths): | ||
"""Generates images and labels given the image directory path. | ||
Args: | ||
split: name of the split to generate examples for (test, train, valid). | ||
paths: dictionary with the paths to the h5 files for each split. | ||
Yields: | ||
A dictionary with the image and the corresponding label. | ||
""" | ||
h5x_file = h5py.File(paths[split + '_x'], 'r') | ||
h5y_file = h5py.File(paths[split + '_y'], 'r') | ||
images = h5x_file['x'] | ||
labels = h5y_file['y'] # Note: Labels are in a N x 1 x 1 x 1 tensor. | ||
for i, (image, label) in enumerate(zip(images, labels)): | ||
label = label.flatten()[0] | ||
yield {'id': '%s_%d' % (split, i), 'image': image, 'label': label} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# coding=utf-8 | ||
# Copyright 2019 The TensorFlow Datasets Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Tests for camelyon_patch.py.""" | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
from tensorflow_datasets import testing | ||
from tensorflow_datasets.image import patch_camelyon | ||
|
||
|
||
class Caltech101Test(testing.DatasetBuilderTestCase): | ||
|
||
DATASET_CLASS = patch_camelyon.PatchCamelyon | ||
|
||
SPLITS = { | ||
'train': 5, | ||
'test': 4, | ||
'validation': 3, | ||
} | ||
|
||
DL_EXTRACT_RESULT = { | ||
'train_x': 'camelyonpatch_level_2_split_train_x.h5', | ||
'train_y': 'camelyonpatch_level_2_split_train_y.h5', | ||
'test_x': 'camelyonpatch_level_2_split_test_x.h5', | ||
'test_y': 'camelyonpatch_level_2_split_test_y.h5', | ||
'valid_x': 'camelyonpatch_level_2_split_valid_x.h5', | ||
'valid_y': 'camelyonpatch_level_2_split_valid_y.h5', | ||
} | ||
|
||
|
||
if __name__ == '__main__': | ||
testing.test_main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# coding=utf-8 | ||
# Copyright 2019 The TensorFlow Datasets Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
r"""Create fake data for Camelyon Patch dataset. | ||
""" | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import google_type_annotations | ||
from __future__ import print_function | ||
|
||
import os | ||
|
||
from absl import app | ||
from absl import flags | ||
|
||
import h5py | ||
import numpy as np | ||
from tensorflow_datasets.core.utils import py_utils | ||
|
||
|
||
flags.DEFINE_string('tfds_dir', py_utils.tfds_dir(), | ||
'Path to tensorflow_datasets directory') | ||
FLAGS = flags.FLAGS | ||
|
||
|
||
def get_output_file_prefix(split): | ||
return os.path.join(FLAGS.tfds_dir, 'testing', 'test_data', 'fake_examples', | ||
'patch_camelyon', | ||
'camelyonpatch_level_2_split_%s' % split) | ||
|
||
|
||
def write_to_h5_file(filepath, dataset_name, content): | ||
with h5py.File(filepath, 'w') as h5_f: | ||
h5_f.create_dataset(dataset_name, data=content) | ||
|
||
|
||
def main(_): | ||
np.random.seed(0x12345) | ||
for split, num_examples in [('train', 5), ('test', 4), ('valid', 3)]: | ||
x = np.random.randint( | ||
low=0, high=256, size=(num_examples, 96, 96, 3), dtype=np.uint8) | ||
y = np.random.randint( | ||
low=0, high=2, size=(num_examples, 1, 1, 1), dtype=np.uint32) | ||
images_filepath = get_output_file_prefix(split) + '_x.h5' | ||
labels_filepath = get_output_file_prefix(split) + '_y.h5' | ||
write_to_h5_file(images_filepath, dataset_name='x', content=x) | ||
write_to_h5_file(labels_filepath, dataset_name='y', content=y) | ||
|
||
|
||
if __name__ == '__main__': | ||
app.run(main) |
Binary file added
BIN
+110 KB
...sets/testing/test_data/fake_examples/patch_camelyon/camelyonpatch_level_2_split_test_x.h5
Binary file not shown.
Binary file added
BIN
+2.11 KB
...sets/testing/test_data/fake_examples/patch_camelyon/camelyonpatch_level_2_split_test_y.h5
Binary file not shown.
Binary file added
BIN
+137 KB
...ets/testing/test_data/fake_examples/patch_camelyon/camelyonpatch_level_2_split_train_x.h5
Binary file not shown.
Binary file added
BIN
+2.11 KB
...ets/testing/test_data/fake_examples/patch_camelyon/camelyonpatch_level_2_split_train_y.h5
Binary file not shown.
Binary file added
BIN
+83.1 KB
...ets/testing/test_data/fake_examples/patch_camelyon/camelyonpatch_level_2_split_valid_x.h5
Binary file not shown.
Binary file added
BIN
+2.11 KB
...ets/testing/test_data/fake_examples/patch_camelyon/camelyonpatch_level_2_split_valid_y.h5
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
https://zenodo.org/record/2546921/files/camelyonpatch_level_2_split_test_x.h5.gz 800875929 79174c2201ad521602a5888be8f36ee10875f37403dd3f2086caf2182ef87245 | ||
https://zenodo.org/record/2546921/files/camelyonpatch_level_2_split_test_y.h5.gz 3040 0a522005fccc8bbd04c5a117bfaf81d8da2676f03a29d7499f71d0a0bd6068ef | ||
https://zenodo.org/record/2546921/files/camelyonpatch_level_2_split_train_x.h5.gz 6421353462 d619e741468a7ab35c7e4a75e6821b7e7e6c9411705d45708f2a0efc8960656c | ||
https://zenodo.org/record/2546921/files/camelyonpatch_level_2_split_train_y.h5.gz 21378 b74126d2c01b20d3661f9b46765d29cf4e4fba6faba29c8e0d09d406331ab75a | ||
https://zenodo.org/record/2546921/files/camelyonpatch_level_2_split_valid_x.h5.gz 805965320 f82ee1670d027b4ec388048d9eabc2186b77c009655dae76d624c0ecb053ccb2 | ||
https://zenodo.org/record/2546921/files/camelyonpatch_level_2_split_valid_y.h5.gz 3038 ce1ae30f08feb468447971cfd0472e7becd0ad96d877c64120c72571439ae48c |