Skip to content
This repository was archived by the owner on Dec 8, 2024. It is now read-only.

Commit aa96b3e

Browse files
author
Silvia
authored
Develop (#94)
* Corrected small bug in predict function * Started updating so that model can be trained after its been reloaded * Minor modifications * Updated model so one can predict from xyz and disabled shuffling in training because it leads to a problem with predictions * Fix for the problem of shuffling * Added some tests to make sure the predictions work * Fixed a tensorboard problem * The saving of the model doesn't cause an error if the directory already exists * Fixed a bug that made a test fail * Modified the name of a parameter * Made modifications to make te symmetry functions more numerically stable * Added a hack that makes ARMP work with fortran ACSF when there are padded representations. Currently works *ONLY* when there is one molecule for the whole data set. * corrected bug in score function for padded molecules * Changes that make the model work quickly even when there is padding. * Fixed discrepancies between fortran and TF acsf * Corrected bug in setting of ACSF parameters * Attempt at fixing issue #10 * another attempt at fixing #10 * Removed a pointless line * set-up * Added the graceful killer * Modifications which prevent installation from breaking on BC4 * Modification to add neural networks to qmlearn * Fix for issue #8 * Random comment * Started including the atomic model * Made the atomic neural network work * Fixed a bug with the indices * Now training and predictions don't use the default graph, to avoid problems * uncommented examples * Removed unique_elements in data class This can be stored in the NN class, but I might reverse the change later * Made tensorflow an optional dependency The reason for this approach is that pip would just auto install tensorflow and you might want the gpu version or your own compiled one. * Made is_numeric non-private and removed legacy code * Added 1d array util function * Removed QML check and moved functions from utils to tf_utils * Support for linear models (no hidden layers) * fixed import bug in tf_utils * Added text to explain that you are scoring on training set * Restructure. But elements are still not working Sorted elements * Moved documentation from init to class * Constant features will now be removed at fit/predict time * Moved get_batch_size back into utils, since it doesn't depend on tf * Made the NeuralNetwork class compliant with sklearn Cannot be any transforms of the input data * Fixed tests that didn't pass * Fixed mistake in checks of set_classes() in ARMP * started fixing ARMP bugs for QM7 * Fixed bug in padding and added examples that give low errors * Attempted fix to make representations single precision * Hot fix for AtomScaler * Minor bug fixes * More bug fixes to make sure tests run * Fixed some tests that had failures * Reverted the fchl tests to original * Fixed path in acsf test * Readded changes to tests * Modifications after code review
1 parent e8224b5 commit aa96b3e

23 files changed

+1748
-529
lines changed

docs/source/qml_examples/examples.ipynb

Lines changed: 76 additions & 34 deletions
Large diffs are not rendered by default.

examples/ARMP_1.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@
3939

4040
## ------------- ** Setting up the estimator ** ---------------
4141

42-
estimator = ARMP(iterations=10, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 1), "angular_rs": np.arange(0.5, 10.5, 1),
43-
"theta_s": np.arange(0, 5, 1)}, tensorboard=False)
42+
acsf_params = {"nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065}
43+
estimator = ARMP(iterations=5000, representation_name='acsf', representation_params=acsf_params, tensorboard=False,
44+
learning_rate=0.075, l1_reg=0.0, l2_reg=0.0)
4445

4546
estimator.generate_compounds(filenames)
4647
estimator.set_properties(energies)

examples/ARMP_2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
## ------------- ** Setting up the estimator ** ---------------
4141

42-
estimator = ARMP(iterations=100, l2_reg=0.0)
42+
estimator = ARMP(iterations=3000, learning_rate=0.075, l1_reg=0.0, l2_reg=0.0, tensorboard=True, store_frequency=50)
4343

4444
estimator.set_representations(representations=descriptor)
4545
estimator.set_classes(zs)

examples/ARMP_3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
## ------------- ** Setting up the estimator ** ---------------
4141

42-
estimator = ARMP(iterations=150, l2_reg=0.0, learning_rate=0.005, hidden_layer_sizes=(40, 20, 10))
42+
estimator = ARMP(iterations=3000, learning_rate=0.075, l1_reg=0.0, l2_reg=0.0, tensorboard=True, store_frequency=50)
4343

4444
## ------------- ** Fitting to the data ** ---------------
4545

examples/ARMP_qm7.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""
2+
This example shows how to use ARMP to overfit 100 data-points for the QM7 data set. It uses the Atom Centred Symmetry
3+
functions as the representation.
4+
5+
This example takes about 3.5 min to run on a mac.
6+
"""
7+
8+
from qml.aglaia.aglaia import ARMP
9+
import glob
10+
import numpy as np
11+
import matplotlib.pyplot as plt
12+
from sklearn import model_selection as modsel
13+
14+
filenames = sorted(glob.glob("../test/qm7/*.xyz"))
15+
energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=[1])
16+
n_samples = len(filenames)
17+
print("%i files were loaded." % (n_samples))
18+
19+
acsf_params = {"nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065}
20+
estimator = ARMP(iterations=6000, representation_name='acsf', representation_params=acsf_params, l1_reg=0.0, l2_reg=0.0,
21+
scoring_function="rmse", tensorboard=False, store_frequency=10, learning_rate=0.075)
22+
23+
estimator.set_properties(energies[:100])
24+
estimator.generate_compounds(filenames[:100])
25+
estimator.generate_representation(method="fortran")
26+
print("The shape of the representation is: %s" % (str(estimator.representation.shape)))
27+
28+
idx = list(range(100))
29+
30+
idx_train, idx_test = modsel.train_test_split(idx, test_size=0, random_state=42, shuffle=True)
31+
32+
estimator.fit(idx_train)
33+
34+
score = estimator.score(idx_train)
35+
print("The RMSE is %s kcal/mol." % (str(score)))
36+
37+
ene_pred = estimator.predict(idx_train)
38+
39+
# Plotting the predictions against the true values
40+
plt.scatter(energies[idx_train], ene_pred)
41+
plt.show()

examples/MRMP_1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
## ------------- ** Setting up the estimator ** ---------------
4343

44-
estimator = MRMP(representation='slatm', representation_params={'slatm_dgrid2': 0.06, 'slatm_dgrid1': 0.06})
44+
estimator = MRMP(representation_name='slatm', representation_params={'slatm_dgrid2': 0.06, 'slatm_dgrid1': 0.06})
4545

4646
estimator.generate_compounds(filenames[:100])
4747
estimator.set_properties(energies[:100])

examples/qmlearn.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,64 @@ def pipelines():
213213
print("*** End pipelines examples ***")
214214
print()
215215

216+
def pipelines_2():
217+
"""
218+
Scikit learn pipeline with a molecular neural network
219+
"""
220+
221+
print("\n *** Begin pipelines example with molecular Neural Network ***")
222+
223+
data = qmlearn.Data("../test/qm7/*.xyz")
224+
energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
225+
data.set_energies(energies)
226+
227+
# Create model
228+
model = sklearn.pipeline.make_pipeline(
229+
qmlearn.preprocessing.AtomScaler(data),
230+
qmlearn.representations.CoulombMatrix(),
231+
qmlearn.models.NeuralNetwork(iterations=500, batch_size=50, learning_rate=0.005),
232+
)
233+
234+
indices = np.arange(1000)
235+
np.random.shuffle(indices)
236+
237+
model.fit(indices[:100])
238+
239+
# Score on the TRAINING set, since you won't get good predictions in 500 iterations
240+
scores = model.score(indices[:100])
241+
print("Negative MAE:", scores)
242+
243+
print("*** End pipelines example with molecular Neural Network *** \n")
244+
245+
def pipelines_3():
246+
"""
247+
Scikit learn pipeline with an atomic neural network
248+
"""
249+
250+
print("\n *** Begin pipelines example with atomic Neural Network ***")
251+
252+
data = qmlearn.Data("../test/qm7/*.xyz")
253+
energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
254+
data.set_energies(energies)
255+
256+
# Create model
257+
model = sklearn.pipeline.make_pipeline(
258+
qmlearn.preprocessing.AtomScaler(data),
259+
qmlearn.representations.AtomCenteredSymmetryFunctions(),
260+
qmlearn.models.NeuralNetwork(iterations=500, batch_size=50, learning_rate=0.005),
261+
)
262+
263+
indices = np.arange(1000)
264+
np.random.shuffle(indices)
265+
266+
model.fit(indices[:100])
267+
268+
# Score on the TRAINING set, since you won't get good predictions in 500 iterations
269+
scores = model.score(indices[:100])
270+
print("Negative MAE:", scores)
271+
272+
print("*** End pipelines example with atomic Neural Network *** \n")
273+
216274
def cross_validation():
217275
"""
218276
Doing cross validation with qmlearn
@@ -285,3 +343,5 @@ def cross_validation():
285343
models()
286344
pipelines()
287345
cross_validation()
346+
pipelines_2()
347+
pipelines_3()

0 commit comments

Comments
 (0)