diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml new file mode 100644 index 0000000..58202db --- /dev/null +++ b/.github/workflows/publish_to_pypi.yml @@ -0,0 +1,30 @@ +name: Upload new ECNet version to PyPI + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..37c4c69 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,26 @@ +name: Run ECNet tests + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + pip install pytest pytest-md + - name: Install package + run: python -m pip install . + - name: Run tests + uses: pavelzw/pytest-action@v2 + with: + emoji: false + report-title: 'ECNet test report' \ No newline at end of file diff --git a/README.md b/README.md index faae5b1..dad733c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,6 @@ [![status](http://joss.theoj.org/papers/f556afbc97e18e1c1294d98e0f7ff99f/status.svg)](http://joss.theoj.org/papers/f556afbc97e18e1c1294d98e0f7ff99f) [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/ECRL/ECNet/master/LICENSE.txt) [![Documentation Status](https://readthedocs.org/projects/ecnet/badge/?version=latest)](https://ecnet.readthedocs.io/en/latest/?badge=latest) -[![Build Status](https://dev.azure.com/uml-ecrl/package-management/_apis/build/status/ECRL.ECNet?branchName=master)](https://dev.azure.com/uml-ecrl/package-management/_build/latest?definitionId=1&branchName=master) **ECNet** is an open source Python package for creating machine learning models to predict fuel properties. ECNet comes bundled with a variety of fuel property datasets, including cetane number, yield sooting index, and research/motor octane number. ECNet was built using the [PyTorch](https://pytorch.org/) library, allowing easy implementation of our models in your existing ML pipelines. diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index f8d0ff7..0000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,26 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: -- master - -pool: - vmImage: 'ubuntu-latest' - -steps: -- task: UsePythonVersion@0 - inputs: - versionSpec: '3.11' - architecture: 'x64' - -- script: | - python -m pip install --upgrade pip setuptools wheel - python setup.py install - displayName: 'Install dependencies' - -- script: | - cd tests - python test_all.py - displayName: 'Unit testing' diff --git a/docs/api_blends.md b/docs/api_blends.md index 9e37387..90b2bc2 100644 --- a/docs/api_blends.md +++ b/docs/api_blends.md @@ -1,16 +1,26 @@ # ecnet.blends +## ecnet.blends.cetane_number + ::: ecnet.blends.cetane_number handler: python +## ecnet.blends.yield_sooting_index + ::: ecnet.blends.yield_sooting_index handler: python +## ecnet.blends.kinematic_viscosity + ::: ecnet.blends.kinematic_viscosity handler: python +## ecnet.blends.cloud_point + ::: ecnet.blends.cloud_point handler: python +## ecnet.blends.lower_heating_value + ::: ecnet.blends.lower_heating_value handler: python \ No newline at end of file diff --git a/docs/api_callbacks.md b/docs/api_callbacks.md index 1f77f4f..a497674 100644 --- a/docs/api_callbacks.md +++ b/docs/api_callbacks.md @@ -1,13 +1,21 @@ # ecnet.callbacks +## ecnet.callbacks.CallbackOperator + ::: ecnet.callbacks.CallbackOperator handler: python +## ecnet.callbacks.Callback + ::: ecnet.callbacks.Callback handler: python +## ecnet.callbacks.LRDecayLinear + ::: ecnet.callbacks.LRDecayLinear handler: python +## ecnet.callbacks.Validator + ::: ecnet.callbacks.Validator handler: python \ No newline at end of file diff --git a/docs/api_datasets.md b/docs/api_datasets.md index bbb7bb6..c316633 100644 --- a/docs/api_datasets.md +++ b/docs/api_datasets.md @@ -1,40 +1,66 @@ # ecnet.datasets +## ecnet.datasets.QSPRDataset + ::: ecnet.datasets.QSPRDataset handler: python +## ecnet.datasets.QSPRDatasetFromFile + ::: ecnet.datasets.QSPRDatasetFromFile handler: python +## ecnet.datasets.QSPRDatasetFromValues + ::: ecnet.datasets.QSPRDatasetFromValues handler: python +## ecnet.datasets.load_bp + ::: ecnet.datasets.load_bp handler: python +## ecnet.datasets.load_cn + ::: ecnet.datasets.load_cn handler: python +## ecnet.datasets.load_cp + ::: ecnet.datasets.load_cp handler: python +## ecnet.datasets.load_kv + ::: ecnet.datasets.load_kv handler: python +## ecnet.datasets.load_lhv + ::: ecnet.datasets.load_lhv handler: python +## ecnet.datasets.load_mon + ::: ecnet.datasets.load_mon handler: python +## ecnet.datasets.load_pp + ::: ecnet.datasets.load_pp handler: python +## ecnet.datasets.load_ron + ::: ecnet.datasets.load_ron handler: python +## ecnet.datasets.load_ysi + ::: ecnet.datasets.load_ysi handler: python +## ecnet.datasets.load_mp + ::: ecnet.datasets.load_mp handler: python \ No newline at end of file diff --git a/docs/api_tasks.md b/docs/api_tasks.md index de2a0e5..f421408 100644 --- a/docs/api_tasks.md +++ b/docs/api_tasks.md @@ -1,13 +1,21 @@ # ecnet.tasks +## ecnet.tasks.select_rfr + ::: ecnet.tasks.select_rfr handler: python +## ecnet.tasks.tune_batch_size + ::: ecnet.tasks.tune_batch_size handler: python +## ecnet.tasks.tune_model_architecture + ::: ecnet.tasks.tune_model_architecture handler: python +## ecnet.tasks.tune_training_parameters + ::: ecnet.tasks.tune_training_parameters handler: python \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 12dc9cc..09a0de0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,7 +7,6 @@ [![status](http://joss.theoj.org/papers/f556afbc97e18e1c1294d98e0f7ff99f/status.svg)](http://joss.theoj.org/papers/f556afbc97e18e1c1294d98e0f7ff99f) [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/ECRL/ECNet/master/LICENSE.txt) [![Documentation Status](https://readthedocs.org/projects/ecnet/badge/?version=latest)](https://ecnet.readthedocs.io/en/latest/?badge=latest) -[![Build Status](https://dev.azure.com/uml-ecrl/package-management/_apis/build/status/ECRL.ECNet?branchName=master)](https://dev.azure.com/uml-ecrl/package-management/_build/latest?definitionId=1&branchName=master) ## Installation @@ -27,4 +26,4 @@ Additional dependencies (torch, sklearn, padelpy, alvadescpy, ecabc) will be ins git clone https://github.com/ecrl/ecnet cd ecnet - python setup.py install + pip install . diff --git a/ecnet/__init__.py b/ecnet/__init__.py index e2b068b..7d8f8d4 100644 --- a/ecnet/__init__.py +++ b/ecnet/__init__.py @@ -1,2 +1,4 @@ +import pkg_resources from .model import ECNet -__version__ = '4.1.1' + +__version__ = pkg_resources.get_distribution("ecnet").version diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c93d492 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +exclude = ["databases*", "paper*"] + +[tool.setuptools.package-data] +"*" = ["*.smiles", "*.target"] + +[tool.pytest.ini_options] +filterwarnings = [ + "ignore::DeprecationWarning", +] + +[project] +name = "ecnet" +version = "4.1.2" +authors = [ + { name="Travis Kessler", email="travis.j.kessler@gmail.com" }, +] +description = "Fuel property prediction using QSPR descriptors" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "torch==2.0.0", + "scikit-learn==1.2.2", + "padelpy==0.1.15", + "alvadescpy==0.1.2", + "ecabc==3.0.0" +] +classifiers = [ + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] + +[project.urls] +"Homepage" = "https://github.com/ecrl/ecnet" +"Bug Tracker" = "https://github.com/ecrl/ecnet/issues" \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index a8cdbca..0000000 --- a/setup.py +++ /dev/null @@ -1,25 +0,0 @@ -from setuptools import find_packages, setup - -setup( - name='ecnet', - version='4.1.1', - description='Fuel property prediction using QSPR descriptors', - url='https://github.com/ecrl/ecnet', - author='Travis Kessler', - author_email='Travis_Kessler@student.uml.edu', - license='MIT', - packages=find_packages(), - install_requires=[ - 'torch==2.0.0', - 'scikit-learn==1.2.2', - 'padelpy==0.1.13', - 'alvadescpy==0.1.2', - 'ecabc==3.0.0' - ], - package_data={ - 'ecnet': [ - 'datasets/data/*' - ] - }, - zip_safe=False -) diff --git a/tests/test_all.py b/tests/test_all.py index 99d18cb..1a5b336 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,12 +1,12 @@ import torch -import unittest +import pytest import os from ecnet.datasets.structs import QSPRDataset, QSPRDatasetFromFile, QSPRDatasetFromValues -from ecnet.datasets.utils import _qspr_from_padel, _qspr_from_alvadesc +from ecnet.datasets.utils import _qspr_from_padel from ecnet.datasets.load_data import _open_smiles_file, _open_target_file, _get_prop_paths,\ - _DATA_PATH, _get_file_data, _load_set -from ecnet.callbacks import LRDecayLinear, Validator + _DATA_PATH, _get_file_data +from ecnet.callbacks import LRDecayLinear from ecnet import ECNet from ecnet.model import load_model from ecnet.tasks.feature_selection import select_rfr @@ -17,279 +17,224 @@ _BACKEND = 'padel' _N_DESC = 1875 _N_PROCESSES = 1 - - -class TestDatasetUtils(unittest.TestCase): - - def test_qspr_generation(self): - - print('UNIT TEST: QSPR generation') - smiles = ['CCC', 'CCCC', 'CCCCC'] - desc, keys = _qspr_from_padel(smiles) - self.assertEqual(len(keys), 1875) - self.assertEqual(len(desc), 3) - for d in desc: - self.assertEqual(len(d), 1875) - - -class TestDatasetLoading(unittest.TestCase): - - def test_open_smiles_file(self): - - print('UNIT TEST: Open .smiles file') - smiles = 'CCC\nCCCC\nCCCCC' - with open('_temp.smiles', 'w') as smi_file: - smi_file.write(smiles) - smi_file.close() - smiles = smiles.split('\n') - opened_smiles = _open_smiles_file('_temp.smiles') - self.assertEqual(len(smiles), len(opened_smiles)) - for i in range(len(smiles)): - self.assertEqual(smiles[i], opened_smiles[i]) - - def test_open_target_file(self): - - print('UNIT TEST: Open .target file') - target_vals = '3.0\n4.0\n5.0' - with open('_temp.target', 'w') as tar_file: - tar_file.write(target_vals) - tar_file.close() - target_vals = target_vals.split('\n') - target_vals = [[float(v)] for v in target_vals] - opened_targets = _open_target_file('_temp.target') - self.assertEqual(len(target_vals), len(opened_targets)) - for i in range(len(target_vals)): - self.assertEqual(target_vals[i], opened_targets[i]) - - def test_get_prop_paths(self): - - print('UNIT TEST: Get property filepaths') - for p in _PROPS: - smiles_fn, target_fn = _get_prop_paths(p) - self.assertEqual( - os.path.join(_DATA_PATH, '{}.smiles'.format(p)), smiles_fn - ) - self.assertEqual( - os.path.join(_DATA_PATH, '{}.target'.format(p)), target_fn - ) - - def test_get_file_data(self): - - print('UNIT TEST: Get file data') - for p in _PROPS: - smiles, targets = _get_file_data(p) - self.assertEqual(len(smiles), len(targets)) - self.assertTrue(type(smiles[0]) == str) - self.assertTrue(type(targets[0]) == list) - self.assertTrue(type(targets[0][0]) == float) - - def tearDown(self): - - if os.path.exists('_temp.smiles'): - os.remove('_temp.smiles') - if os.path.exists('_temp.target'): - os.remove('_temp.target') - - -class TestDatasetStructs(unittest.TestCase): - - def test_qsprdataset(self): - - print('UNIT TEST: Create QSPRDataset') - smiles = ['CCC', 'CCCC', 'CCCCC'] - targets = [[3.0], [4.0], [5.0]] - ds = QSPRDataset(smiles, targets, backend=_BACKEND) - self.assertEqual(len(ds.smiles), 3) - self.assertEqual(len(ds.target_vals), 3) - self.assertEqual(len(ds.target_vals[0]), len(targets[0])) - self.assertEqual(len(ds.desc_vals), 3) - self.assertEqual(len(ds.desc_vals[0]), _N_DESC) - self.assertEqual(type(ds.desc_vals), type(torch.tensor([]))) - self.assertEqual(len(ds.desc_names), _N_DESC) - - def test_qsprdatasetfromfile(self): - - print('UNIT TEST: Create QSPRDatasetFromFile') - smiles = 'CCC\nCCCC\nCCCCC' - with open('_temp.smiles', 'w') as smi_file: - smi_file.write(smiles) - smi_file.close() - smiles = smiles.split('\n') - targets = [[3.0], [4.0], [5.0]] - ds = QSPRDatasetFromFile('_temp.smiles', targets, backend=_BACKEND) - self.assertEqual(len(ds.smiles), 3) - self.assertEqual(len(ds.target_vals), 3) - self.assertEqual(len(ds.target_vals[0]), len(targets[0])) - self.assertEqual(len(ds.desc_vals), 3) - self.assertEqual(len(ds.desc_vals[0]), _N_DESC) - self.assertEqual(type(ds.desc_vals), type(torch.tensor([]))) - self.assertEqual(len(ds.desc_names), _N_DESC) - - def test_qsprdatasetfromvalues(self): - - print('UNIT TEST: Create QSPRDatasetFromValues') - desc_vals = [ - [0.0, 0.1, 0.2, 0.3], - [0.0, 0.2, 0.3, 0.1], - [0.1, 0.3, 0.0, 0.2] - ] - target_vals = [[1.0], [2.0], [3.0]] - ds = QSPRDatasetFromValues(desc_vals, target_vals) - self.assertEqual(len(ds.smiles), len(desc_vals)) - self.assertEqual(len(ds.desc_names), len(desc_vals[0])) - self.assertEqual(len(ds.desc_vals), len(desc_vals)) - self.assertEqual(len(ds.target_vals), len(target_vals)) - self.assertEqual(len(ds.target_vals[0]), len(target_vals[0])) - self.assertEqual(type(ds.desc_vals), type(torch.tensor([]))) - self.assertEqual(type(ds.target_vals), type(torch.tensor([]))) - - def tearDown(self): - - if os.path.exists('_temp.smiles'): - os.remove('_temp.smiles') - - -class TestCallbacks(unittest.TestCase): - - def test_lrlineardecay(self): - - print('UNIT TEST: LR Decay') - model = torch.nn.Sequential( - torch.nn.Linear(3, 5), - torch.nn.ReLU(), - torch.nn.Linear(5, 1) - ) - lr = 0.001 - lrd = 0.00001 - optim = torch.optim.Adam(model.parameters(), lr=lr) - linear_decay = LRDecayLinear(lr, lrd, optim) - reached_epoch = 0 - for epoch in range(10000): - if not linear_decay.on_epoch_begin(epoch): - break - reached_epoch += 1 - if reached_epoch > int(lr / lrd): - raise RuntimeError('Linear decay: epoch reached {}'.format(reached_epoch)) - - def test_validator(self): - - # I can't think of a good way to test this one, but it works in practice - return - - -class TestModel(unittest.TestCase): - - def test_construct(self): - - print('UNIT TEST: Construct model') - _INPUT_DIM = 3 - _OUTPUT_DIM = 1 - _HIDDEN_DIM = 5 - _N_HIDDEN = 2 - net = ECNet(_INPUT_DIM, _OUTPUT_DIM, _HIDDEN_DIM, _N_HIDDEN) - self.assertEqual(len(net.model), 2 + _N_HIDDEN) - self.assertEqual(net.model[0].in_features, _INPUT_DIM) - self.assertEqual(net.model[0].out_features, _HIDDEN_DIM) - self.assertEqual(net.model[-1].in_features, _HIDDEN_DIM) - self.assertEqual(net.model[-1].out_features, _OUTPUT_DIM) - for layer in net.model[1:-1]: - self.assertEqual(layer.in_features, _HIDDEN_DIM) - self.assertEqual(layer.out_features, _HIDDEN_DIM) - - def test_fit(self): - - print('UNIT TEST: Fit model') - _EPOCHS = 10 - net = ECNet(_N_DESC, 1, 512, 2) - smiles = ['CCC', 'CCCC', 'CCCCC'] - targets = [[3.0], [4.0], [5.0]] - tr_loss, val_loss = net.fit(smiles, targets, backend=_BACKEND, epochs=_EPOCHS) - self.assertEqual(len(tr_loss), len(val_loss)) - self.assertEqual(len(tr_loss), _EPOCHS) - - def test_save_load(self): - - print('UNIT TEST: Model save/load') - _EPOCHS = 10 - net = ECNet(_N_DESC, 1, 512, 2) - smiles = ['CCC', 'CCCC', 'CCCCC'] - targets = [[3.0], [4.0], [5.0]] - ds = QSPRDataset(smiles, targets, backend=_BACKEND) - tr_loss, val_loss = net.fit(dataset=ds, epochs=_EPOCHS) - with self.assertRaises(ValueError): - net.save('_test.badext') - net.save('_test.pt') - val_0 = net(ds[0]['desc_vals']) - with self.assertRaises(FileNotFoundError): - net = load_model('badfile.pt') - net = load_model('_test.pt') - val_0_new = net(ds[0]['desc_vals']) - self.assertEqual(val_0, val_0_new) - - def tearDown(self): - - if os.path.exists('_test.pt'): - os.remove('_test.pt') - - -class TestTasks(unittest.TestCase): - - def test_feature_selection(self): - - print('UNIT TEST: Feature selection') - smiles = ['CCC', 'CCCC', 'CCCCC'] - targets = [[3.0], [4.0], [5.0]] - ds = QSPRDataset(smiles, targets, backend=_BACKEND) - indices, importances = select_rfr(ds, total_importance=0.90) - self.assertTrue(len(indices) < _N_DESC) - self.assertEqual(len(indices), len(importances)) - self.assertEqual(importances, sorted(importances, reverse=True)) - for index in indices: - self.assertTrue(index < _N_DESC) - - def test_tune_batch_size(self): - - print('UNIT TEST: Tune batch size') - smiles = ['CCC', 'CCCC', 'CCCCCC'] - targets = [[3.0], [4.0], [6.0]] - ds_train = QSPRDataset(smiles, targets, backend=_BACKEND) - smiles = ['CCCCC'] - targets = [[5.0]] - ds_eval = QSPRDataset(smiles, targets, backend=_BACKEND) - model = ECNet(_N_DESC, 1, 5, 1) - res = tune_batch_size(1, 1, ds_train, ds_eval, _N_PROCESSES) - self.assertTrue(1 <= res['batch_size'] <= len(ds_train.target_vals)) - - def test_tune_model_architecture(self): - - print('UNIT TEST: Tune model architecture') - smiles = ['CCC', 'CCCC', 'CCCCCC'] - targets = [[3.0], [4.0], [6.0]] - ds_train = QSPRDataset(smiles, targets, backend=_BACKEND) - smiles = ['CCCCC'] - targets = [[5.0]] - ds_eval = QSPRDataset(smiles, targets, backend=_BACKEND) - model = ECNet(_N_DESC, 1, 5, 1) - res = tune_model_architecture(1, 1, ds_train, ds_eval, _N_PROCESSES) - for k in list(res.keys()): - self.assertTrue(res[k] >= CONFIG['architecture_params_range'][k][0]) - self.assertTrue(res[k] <= CONFIG['architecture_params_range'][k][1]) - - def test_tune_training_hps(self): - - print('UNIT TEST: Tune model hyper-parameters') - smiles = ['CCC', 'CCCC', 'CCCCCC'] - targets = [[3.0], [4.0], [6.0]] - ds_train = QSPRDataset(smiles, targets, backend=_BACKEND) - smiles = ['CCCCC'] - targets = [[5.0]] - ds_eval = QSPRDataset(smiles, targets, backend=_BACKEND) - res = tune_training_parameters(1, 1, ds_train, ds_eval, _N_PROCESSES) - for k in list(res.keys()): - self.assertTrue(res[k] >= CONFIG['training_params_range'][k][0]) - self.assertTrue(res[k] <= CONFIG['training_params_range'][k][1]) - - -if __name__ == '__main__': - - unittest.main() +_EPOCHS = 10 + +# dataset utils + +def test_dataset_utils(): + smiles = ['CCC', 'CCCC', 'CCCCC'] + desc, keys = _qspr_from_padel(smiles) + assert len(keys) == _N_DESC + assert len(desc) == 3 + for d in desc: + assert len(d) == _N_DESC + +# dataset loading + +def test_open_smiles_file(): + smiles = 'CCC\nCCCC\nCCCCC' + with open('_temp.smiles', 'w') as smi_file: + smi_file.write(smiles) + smi_file.close() + smiles = smiles.split('\n') + opened_smiles = _open_smiles_file('_temp.smiles') + assert len(smiles) == len(opened_smiles) + for i in range(len(smiles)): + assert smiles[i] == opened_smiles[i] + + +def test_open_target_file(): + print('UNIT TEST: Open .target file') + target_vals = '3.0\n4.0\n5.0' + with open('_temp.target', 'w') as tar_file: + tar_file.write(target_vals) + tar_file.close() + target_vals = target_vals.split('\n') + target_vals = [[float(v)] for v in target_vals] + opened_targets = _open_target_file('_temp.target') + assert len(target_vals) == len(opened_targets) + for i in range(len(target_vals)): + assert target_vals[i] == opened_targets[i] + + +def test_get_prop_paths(): + for p in _PROPS: + smiles_fn, target_fn = _get_prop_paths(p) + assert os.path.join(_DATA_PATH, f'{p}.smiles') == smiles_fn + assert os.path.join(_DATA_PATH, f'{p}.target') == target_fn + + +def test_get_file_data(): + for p in _PROPS: + smiles, targets = _get_file_data(p) + assert len(smiles) == len(targets) + assert type(smiles[0]) == str + assert type(targets[0]) == list + assert type(targets[0][0]) == float + +# dataset structures + +def test_qsprdataset(): + smiles = ['CCC', 'CCCC', 'CCCCC'] + targets = [[3.0], [4.0], [5.0]] + ds = QSPRDataset(smiles, targets, backend=_BACKEND) + assert len(ds.smiles) == len(smiles) + assert len(ds.target_vals) == len(targets) + assert len(ds.target_vals[0]) == len(targets[0]) + assert len(ds.desc_vals) == len(smiles) + assert len(ds.desc_vals[0]) == _N_DESC + assert type(ds.desc_vals) == type(torch.tensor([])) + assert len(ds.desc_names) == _N_DESC + + +def test_qsprdatasetfromfile(): + smiles = 'CCC\nCCCC\nCCCCC' + with open('_temp.smiles', 'w') as smi_file: + smi_file.write(smiles) + smi_file.close() + smiles = smiles.split('\n') + targets = [[3.0], [4.0], [5.0]] + ds = QSPRDatasetFromFile('_temp.smiles', targets, backend=_BACKEND) + assert len(ds.smiles) == len(smiles) + assert len(ds.target_vals) == len(targets) + assert len(ds.target_vals[0]) == len(targets[0]) + assert len(ds.desc_vals) == len(smiles) + assert len(ds.desc_vals[0]) == _N_DESC + assert type(ds.desc_vals) == type(torch.tensor([])) + assert len(ds.desc_names) == _N_DESC + + +def test_qsprdatasetfromvalues(): + desc_vals = [ + [0.0, 0.1, 0.2, 0.3], + [0.0, 0.2, 0.3, 0.1], + [0.1, 0.3, 0.0, 0.2] + ] + target_vals = [[1.0], [2.0], [3.0]] + ds = QSPRDatasetFromValues(desc_vals, target_vals) + assert len(ds.smiles) == len(desc_vals) + assert len(ds.desc_names) == len(desc_vals[0]) + assert len(ds.desc_vals) == len(desc_vals) + assert len(ds.target_vals) == len(target_vals) + assert len(ds.target_vals[0]) == len(target_vals[0]) + assert type(ds.desc_vals) == type(torch.tensor([])) + assert type(ds.target_vals) == type(torch.tensor([])) + +# callbacks + +def test_lrlineardecay(): + model = torch.nn.Sequential( + torch.nn.Linear(3, 5), + torch.nn.ReLU(), + torch.nn.Linear(5, 1) + ) + lr = 0.001 + lrd = 0.00001 + optim = torch.optim.Adam(model.parameters(), lr=lr) + linear_decay = LRDecayLinear(lr, lrd, optim) + reached_epoch = 0 + for epoch in range(10000): + if not linear_decay.on_epoch_begin(epoch): + break + reached_epoch += 1 + if reached_epoch > int(lr / lrd): + raise RuntimeError('Linear decay: epoch reached {}'.format(reached_epoch)) + + +def test_validator(): + # I can't think of a good way to test this one, but it works in practice + return + +# model + +def test_model_construct(): + _INPUT_DIM = 3 + _OUTPUT_DIM = 1 + _HIDDEN_DIM = 5 + _N_HIDDEN = 2 + net = ECNet(_INPUT_DIM, _OUTPUT_DIM, _HIDDEN_DIM, _N_HIDDEN) + assert len(net.model) == 2 + _N_HIDDEN + assert net.model[0].in_features == _INPUT_DIM + assert net.model[0].out_features == _HIDDEN_DIM + assert net.model[-1].in_features == _HIDDEN_DIM + assert net.model[-1].out_features == _OUTPUT_DIM + for layer in net.model[1:-1]: + assert layer.in_features == _HIDDEN_DIM + assert layer.out_features == _HIDDEN_DIM + + +def test_model_fit(): + net = ECNet(_N_DESC, 1, 512, 2) + smiles = ['CCC', 'CCCC', 'CCCCC'] + targets = [[3.0], [4.0], [5.0]] + tr_loss, val_loss = net.fit(smiles, targets, backend=_BACKEND, epochs=_EPOCHS) + assert len(tr_loss) == len(val_loss) + assert len(tr_loss) == _EPOCHS + + +def test_model_save_load(): + net = ECNet(_N_DESC, 1, 512, 2) + smiles = ['CCC', 'CCCC', 'CCCCC'] + targets = [[3.0], [4.0], [5.0]] + ds = QSPRDataset(smiles, targets, backend=_BACKEND) + tr_loss, val_loss = net.fit(dataset=ds, epochs=_EPOCHS) + with pytest.raises(ValueError): + net.save('_test.badext') + net.save('_test.pt') + val_0 = net(ds[0]['desc_vals']) + with pytest.raises(FileNotFoundError): + net = load_model('badfile.pt') + net = load_model('_test.pt') + val_0_new = net(ds[0]['desc_vals']) + assert val_0 == val_0_new + +# tasks + +def test_feature_selection(): + smiles = ['CCC', 'CCCC', 'CCCCC'] + targets = [[3.0], [4.0], [5.0]] + ds = QSPRDataset(smiles, targets, backend=_BACKEND) + indices, importances = select_rfr(ds, total_importance=0.90) + assert len(indices) < _N_DESC + assert len(indices) == len(importances) + assert importances == sorted(importances, reverse=True) + for index in indices: + assert index < _N_DESC + + +def test_tune_batch_size(): + smiles = ['CCC', 'CCCC', 'CCCCCC'] + targets = [[3.0], [4.0], [6.0]] + ds_train = QSPRDataset(smiles, targets, backend=_BACKEND) + smiles = ['CCCCC'] + targets = [[5.0]] + ds_eval = QSPRDataset(smiles, targets, backend=_BACKEND) + model = ECNet(_N_DESC, 1, 5, 1) + res = tune_batch_size(1, 1, ds_train, ds_eval, _N_PROCESSES) + assert 1 <= res['batch_size'] <= len(ds_train.target_vals) + + +def test_tune_model_architecture(): + smiles = ['CCC', 'CCCC', 'CCCCCC'] + targets = [[3.0], [4.0], [6.0]] + ds_train = QSPRDataset(smiles, targets, backend=_BACKEND) + smiles = ['CCCCC'] + targets = [[5.0]] + ds_eval = QSPRDataset(smiles, targets, backend=_BACKEND) + res = tune_model_architecture(1, 1, ds_train, ds_eval, _N_PROCESSES,) + for k in list(res.keys()): + assert res[k] >= CONFIG['architecture_params_range'][k][0] + assert res[k] <= CONFIG['architecture_params_range'][k][1] + + +def test_tune_training_hyperparams(): + smiles = ['CCC', 'CCCC', 'CCCCCC'] + targets = [[3.0], [4.0], [6.0]] + ds_train = QSPRDataset(smiles, targets, backend=_BACKEND) + smiles = ['CCCCC'] + targets = [[5.0]] + ds_eval = QSPRDataset(smiles, targets, backend=_BACKEND) + res = tune_training_parameters(1, 1, ds_train, ds_eval, _N_PROCESSES) + for k in list(res.keys()): + assert res[k] >= CONFIG['training_params_range'][k][0] + assert res[k] <= CONFIG['training_params_range'][k][1]