Skip to content

Commit

Permalink
Add QM9 checksums, README.md and CITATIONS.bib.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 616084769
  • Loading branch information
machc authored and The TensorFlow Datasets Authors committed Mar 15, 2024
1 parent 89440d4 commit 2f6c156
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 25 deletions.
8 changes: 8 additions & 0 deletions tensorflow_datasets/datasets/qm9/CITATIONS.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@article{ramakrishnan2014quantum,
title={Quantum chemistry structures and properties of 134 kilo molecules},
author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and von Lilienfeld, O Anatole},
journal={Scientific Data},
volume={1},
year={2014},
publisher={Nature Publishing Group}
}
5 changes: 5 additions & 0 deletions tensorflow_datasets/datasets/qm9/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
QM9 consists of computed geometric, energetic, electronic, and thermodynamic
properties for 134k stable small organic molecules made up of CHONF. As usual,
we remove the uncharacterized molecules and provide the remaining 130,831 in the
original order (not shuffled). We provide a single 'train' split, users are
expected to make their own validation/test splits.
15 changes: 15 additions & 0 deletions tensorflow_datasets/datasets/qm9/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# coding=utf-8
# Copyright 2024 The TensorFlow Datasets Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

3 changes: 3 additions & 0 deletions tensorflow_datasets/datasets/qm9/checksums.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://figshare.com/ndownloader/files/3195395 964 af739d4a0fbe894a56f346ad6045ee1b74c58f8a8c9ef3fbfed920c9ad8b00f9 atomref.txt
https://springernature.figshare.com/ndownloader/files/3195389 86144227 3a63848ac80691bdb8d41834b575afad345b9300d7a2db0c38adb7f6eaa8360c dsgdb9nsd.xyz.tar.bz2
https://springernature.figshare.com/ndownloader/files/3195404 486752 3aa5115d540b356de94791d4a74c3bf1ed91c469ecf52a4f5d7cc0506fe02e24 uncharacterized.txt
31 changes: 7 additions & 24 deletions tensorflow_datasets/datasets/qm9/qm9_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,6 @@

pd = tfds.core.lazy_imports.pandas

_DESCRIPTION = """\
QM9 consists of computed geometric, energetic, electronic, and thermodynamic
properties for 134k stable small organic molecules made up of CHONF.
"""

_CITATION = """\
@article{ramakrishnan2014quantum,
title={Quantum chemistry structures and properties of 134 kilo molecules},
author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and von Lilienfeld, O Anatole},
journal={Scientific Data},
volume={1},
year={2014},
publisher={Nature Publishing Group}
}
"""

_HOMEPAGE = 'https://doi.org/10.6084/m9.figshare.c.978904.v5'

_ATOMREF_URL = 'https://figshare.com/ndownloader/files/3195395'
Expand Down Expand Up @@ -145,15 +129,14 @@ class Builder(tfds.core.GeneratorBasedBuilder):

def _info(self) -> tfds.core.DatasetInfo:
"""Returns the dataset metadata."""
return tfds.core.DatasetInfo(
builder=self,
description=_DESCRIPTION,
return self.dataset_info_from_configs(
disable_shuffling=True,
features=tfds.features.FeaturesDict({
'num_atoms': tfds.features.Tensor(shape=(), dtype=np.int64),
'charges': tfds.features.Tensor(shape=(29,), dtype=np.int64),
'Mulliken_charges': tfds.features.Tensor(shape=(29,),
dtype=np.float32),
'Mulliken_charges': tfds.features.Tensor(
shape=(29,), dtype=np.float32
),
'positions': tfds.features.Tensor(shape=(29, 3), dtype=np.float32),
'index': tfds.features.Tensor(shape=(), dtype=np.int64),
'A': tfds.features.Tensor(shape=(), dtype=np.float32),
Expand All @@ -180,13 +163,13 @@ def _info(self) -> tfds.core.DatasetInfo:
'SMILES_relaxed': tfds.features.Tensor(shape=(), dtype=np.str_),
'InChI': tfds.features.Tensor(shape=(), dtype=np.str_),
'InChI_relaxed': tfds.features.Tensor(shape=(), dtype=np.str_),
'frequencies': tfds.features.Tensor(shape=(None,),
dtype=np.float32),
'frequencies': tfds.features.Tensor(
shape=(None,), dtype=np.float32
),
}),
# These are returned if `as_supervised=True` in `builder.as_dataset`.
supervised_keys=None,
homepage=_HOMEPAGE,
citation=_CITATION,
)

def _split_generators(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class Qm9Test(testing.DatasetBuilderTestCase):
qm9_dataset_builder._VALIDATION_SIZE = 1
qm9_dataset_builder._TEST_SIZE = 1

SKIP_CHECKSUMS = True
DATASET_CLASS = qm9_dataset_builder.Builder

DL_EXTRACT_RESULT = {
Expand Down

0 comments on commit 2f6c156

Please sign in to comment.